File: StringCopyPaste\StringCopyPasteHelpers.cs
Web Access
Project: ..\..\..\src\EditorFeatures\CSharp\Microsoft.CodeAnalysis.CSharp.EditorFeatures.csproj (Microsoft.CodeAnalysis.CSharp.EditorFeatures)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Immutable;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Linq;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Extensions;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.Editor.Shared.Extensions;
using Microsoft.CodeAnalysis.InheritanceMargin;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Shared.Collections;
using Microsoft.CodeAnalysis.Text;
using Microsoft.CodeAnalysis.Text.Shared.Extensions;
using Microsoft.VisualStudio.Text;
using Roslyn.Utilities;
 
namespace Microsoft.CodeAnalysis.Editor.CSharp.StringCopyPaste
{
    internal static class StringCopyPasteHelpers
    {
        public static bool HasNewLine(TextLine line)
            => line.Span.End != line.SpanIncludingLineBreak.End;
 
        /// <summary>
        /// Gets the character at the requested position, or <c>\0</c> if out of bounds. 
        /// </summary>
        public static char SafeCharAt(SourceText text, int index)
            => index >= 0 && index < text.Length ? text[index] : '\0';
 
        /// <summary>
        /// True if the string literal contains an error diagnostic that indicates a parsing problem with it. For
        /// interpolated strings, this only includes the text sections, and not any interpolation holes in the literal.
        /// </summary>
        public static bool ContainsError(ExpressionSyntax stringExpression)
        {
            if (stringExpression is LiteralExpressionSyntax)
                return NodeOrTokenContainsError(stringExpression);
 
            if (stringExpression is InterpolatedStringExpressionSyntax interpolatedString)
            {
                using var _ = PooledHashSet<Diagnostic>.GetInstance(out var errors);
                foreach (var diagnostic in interpolatedString.GetDiagnostics())
                {
                    if (diagnostic.Severity == DiagnosticSeverity.Error)
                        errors.Add(diagnostic);
                }
 
                // we don't care about errors in holes.  Only errors in the content portions of the string.
                for (int i = 0, n = interpolatedString.Contents.Count; i < n && errors.Count > 0; i++)
                {
                    if (interpolatedString.Contents[i] is InterpolatedStringTextSyntax text)
                    {
                        foreach (var diagnostic in text.GetDiagnostics())
                            errors.Remove(diagnostic);
                    }
                }
 
                return errors.Count > 0;
            }
 
            throw ExceptionUtilities.UnexpectedValue(stringExpression);
        }
 
        public static bool NodeOrTokenContainsError(SyntaxNodeOrToken nodeOrToken)
        {
            foreach (var diagnostic in nodeOrToken.GetDiagnostics())
            {
                if (diagnostic.Severity == DiagnosticSeverity.Error)
                    return true;
            }
 
            return false;
        }
 
        public static bool AllWhitespace(INormalizedTextChangeCollection changes)
        {
            foreach (var change in changes)
            {
                if (!AllWhitespace(change.NewText))
                    return false;
            }
 
            return true;
        }
 
        private static bool AllWhitespace(string text)
        {
            foreach (var ch in text)
            {
                if (!SyntaxFacts.IsWhitespace(ch))
                    return false;
            }
 
            return true;
        }
 
        /// <summary>
        /// Given a TextLine, returns the index (in the SourceText) of the first character of it that is not a
        /// Whitespace character.  The LineBreak parts of the line are not considered here.  If the line is empty/blank
        /// (again, not counting LineBreak characters) then -1 is returned.
        /// </summary>
        public static int GetFirstNonWhitespaceIndex(SourceText text, TextLine line)
        {
            for (int i = line.Start, n = line.End; i < n; i++)
            {
                if (!SyntaxFacts.IsWhitespace(text[i]))
                    return i;
            }
 
            return -1;
        }
 
        public static bool ContainsControlCharacter(INormalizedTextChangeCollection changes)
        {
            foreach (var change in changes)
            {
                if (ContainsControlCharacter(change.NewText))
                    return true;
            }
 
            return false;
        }
 
        public static bool ContainsControlCharacter(string newText)
        {
            foreach (var c in newText)
            {
                if (char.IsControl(c))
                    return true;
            }
 
            return false;
        }
 
        /// <summary>
        /// Removes all characters matching <see cref="SyntaxFacts.IsWhitespace(char)"/> from the start of <paramref
        /// name="value"/>.
        /// </summary>
        public static (string whitespace, string contents) ExtractWhitespace(string value)
        {
            var start = 0;
            while (start < value.Length && SyntaxFacts.IsWhitespace(value[start]))
                start++;
 
            return (value[..start], value[start..]);
        }
 
        public static bool IsVerbatimStringExpression(ExpressionSyntax stringExpression)
            => stringExpression is LiteralExpressionSyntax literalExpression && literalExpression.Token.IsVerbatimStringLiteral() ||
               stringExpression is InterpolatedStringExpressionSyntax { StringStartToken.RawKind: (int)SyntaxKind.InterpolatedVerbatimStringStartToken };
 
        public static bool IsAnyRawStringExpression(ExpressionSyntax expression)
            => expression is LiteralExpressionSyntax literal
                ? IsRawStringLiteral(literal)
                : IsRawStringLiteral((InterpolatedStringExpressionSyntax)expression);
 
        public static bool IsAnyMultiLineRawStringExpression(ExpressionSyntax expression)
            => expression is LiteralExpressionSyntax { Token.RawKind: (int)SyntaxKind.MultiLineRawStringLiteralToken } or
                             InterpolatedStringExpressionSyntax { StringStartToken.RawKind: (int)SyntaxKind.InterpolatedMultiLineRawStringStartToken };
 
        public static bool IsRawStringLiteral(InterpolatedStringExpressionSyntax interpolatedString)
            => interpolatedString.StringStartToken.Kind() is SyntaxKind.InterpolatedSingleLineRawStringStartToken or SyntaxKind.InterpolatedMultiLineRawStringStartToken;
 
        public static bool IsRawStringLiteral(LiteralExpressionSyntax literal)
            => literal.Token.Kind() is SyntaxKind.SingleLineRawStringLiteralToken or SyntaxKind.MultiLineRawStringLiteralToken;
 
        public static int SkipU8Suffix(SourceText text, int end)
        {
            if (SafeCharAt(text, end - 1) == '8')
                end--;
            if (SafeCharAt(text, end - 1) is 'u' or 'U')
                end--;
            return end;
        }
 
        /// <summary>
        /// Given a section of a document, finds the longest sequence of quote (<c>"</c>) characters in it.  Used to
        /// determine if a raw string literal needs to grow its delimiters to ensure that the quote sequence will no
        /// longer be a problem.
        /// </summary>
        public static int GetLongestQuoteSequence(SourceText text, TextSpan span)
            => GetLongestCharacterSequence(text, span, '"');
 
        public static int GetLongestOpenBraceSequence(SourceText text, TextSpan span)
            => GetLongestCharacterSequence(text, span, '{');
 
        public static int GetLongestCloseBraceSequence(SourceText text, TextSpan span)
            => GetLongestCharacterSequence(text, span, '}');
 
        /// <summary>
        /// Given a section of a document, finds the longest sequence of of a given <paramref name="character"/> in it.
        /// Used to determine if a raw string literal needs to grow its delimiters to ensure that the sequence
        /// will no longer be a problem.
        /// </summary>
        private static int GetLongestCharacterSequence(SourceText text, TextSpan span, char character)
        {
            var longestCount = 0;
            for (int currentIndex = span.Start, contentEnd = span.End; currentIndex < contentEnd;)
            {
                if (text[currentIndex] == character)
                {
                    var endQuoteIndex = currentIndex;
                    while (endQuoteIndex < contentEnd && text[endQuoteIndex] == character)
                        endQuoteIndex++;
 
                    longestCount = Math.Max(longestCount, endQuoteIndex - currentIndex);
                    currentIndex = endQuoteIndex;
                }
                else
                {
                    currentIndex++;
                }
            }
 
            return longestCount;
        }
 
        /// <summary>
        /// Given a set of selections, finds the innermost string-literal/interpolation that they are all contained in.
        /// If no such literal/interpolation exists, this returns null. 
        /// </summary>
        public static ExpressionSyntax? FindCommonContainingStringExpression(
            SyntaxNode root, NormalizedSnapshotSpanCollection selectionsBeforePaste)
        {
            ExpressionSyntax? expression = null;
            foreach (var snapshotSpan in selectionsBeforePaste)
            {
                var container = FindContainingSupportedStringExpression(root, snapshotSpan.Start.Position);
                if (container == null)
                    return null;
 
                expression ??= container;
                if (expression != container)
                    return null;
            }
 
            return expression;
        }
 
        public static ExpressionSyntax? FindContainingSupportedStringExpression(SyntaxNode root, int position)
        {
            var node = root.FindToken(position).Parent;
            for (var current = node; current != null; current = current.Parent)
            {
                if (current is LiteralExpressionSyntax literalExpression)
                    return IsSupportedStringExpression(literalExpression) ? literalExpression : null;
 
                if (current is InterpolatedStringExpressionSyntax interpolatedString)
                    return IsSupportedStringExpression(interpolatedString) ? interpolatedString : null;
            }
 
            return null;
        }
 
        public static bool IsSupportedStringExpression(ExpressionSyntax expression)
        {
            // When new string forms are added, support for them can be introduced here.  However, by checking the exact
            // types of strings supported, downstream code can know exactly what forms they should be looking for and
            // that nothing else may flow down to them.
 
            if (expression is LiteralExpressionSyntax
                {
                    RawKind: (int)SyntaxKind.StringLiteralExpression,
                    Token.RawKind: (int)SyntaxKind.StringLiteralToken or
                                   (int)SyntaxKind.SingleLineRawStringLiteralToken or
                                   (int)SyntaxKind.MultiLineRawStringLiteralToken,
                })
            {
                return true;
            }
 
            if (expression is InterpolatedStringExpressionSyntax
                {
                    StringStartToken.RawKind: (int)SyntaxKind.InterpolatedStringStartToken or
                                              (int)SyntaxKind.InterpolatedVerbatimStringStartToken or
                                              (int)SyntaxKind.InterpolatedSingleLineRawStringStartToken or
                                              (int)SyntaxKind.InterpolatedMultiLineRawStringStartToken,
                })
            {
                return true;
            }
 
            return false;
        }
 
        public static string EscapeForNonRawStringLiteral_DoNotCallDirectly(bool isVerbatim, bool isInterpolated, bool trySkipExistingEscapes, string value)
        {
            if (isVerbatim)
                return EscapeForNonRawVerbatimStringLiteral(isInterpolated, trySkipExistingEscapes, value);
 
            // Standard strings have a much larger set of cases to consider.
            using var _ = PooledStringBuilder.GetInstance(out var builder);
 
            // taken from object-display
            for (var i = 0; i < value.Length; i++)
            {
                var ch = value[i];
                var nextCh = i == value.Length - 1 ? 0 : value[i + 1];
 
                if (CharUnicodeInfo.GetUnicodeCategory(ch) == UnicodeCategory.Surrogate)
                {
                    var category = CharUnicodeInfo.GetUnicodeCategory(value, i);
                    if (category == UnicodeCategory.Surrogate)
                    {
                        // an unpaired surrogate
                        builder.Append("\\u" + ((int)ch).ToString("x4"));
                    }
                    else if (NeedsEscaping(category))
                    {
                        // a surrogate pair that needs to be escaped
                        var unicode = char.ConvertToUtf32(value, i);
                        builder.Append("\\U" + unicode.ToString("x8"));
                        i++; // skip the already-encoded second surrogate of the pair
                    }
                    else
                    {
                        // copy a printable surrogate pair directly
                        builder.Append(ch);
                        builder.Append(value[++i]);
                    }
                }
                else if (TryReplaceChar(ch, out var replaceWith))
                {
                    builder.Append(replaceWith);
                }
                else
                {
                    builder.Append(ch);
 
                    // if we see a special character then skip the following one if the following one already escapes it.
                    // Otherwise, if it's not already escaped, then escape it.
                    if (isInterpolated && ch is '{' or '}')
                    {
                        if (trySkipExistingEscapes && nextCh == ch)
                            i++;
                        else
                            builder.Append(ch);
                    }
                }
            }
 
            return builder.ToString();
 
            static bool TryReplaceChar(char c, [NotNullWhen(true)] out string? replaceWith)
            {
                replaceWith = null;
                switch (c)
                {
                    case '\\':
                        replaceWith = "\\\\";
                        break;
                    case '\0':
                        replaceWith = "\\0";
                        break;
                    case '\a':
                        replaceWith = "\\a";
                        break;
                    case '\b':
                        replaceWith = "\\b";
                        break;
                    case '\f':
                        replaceWith = "\\f";
                        break;
                    case '\n':
                        replaceWith = "\\n";
                        break;
                    case '\r':
                        replaceWith = "\\r";
                        break;
                    case '\t':
                        replaceWith = "\\t";
                        break;
                    case '\v':
                        replaceWith = "\\v";
                        break;
                    case '"':
                        replaceWith = "\\\"";
                        break;
                }
 
                if (replaceWith != null)
                    return true;
 
                if (NeedsEscaping(CharUnicodeInfo.GetUnicodeCategory(c)))
                {
                    replaceWith = "\\u" + ((int)c).ToString("x4");
                    return true;
                }
 
                return false;
            }
 
            static bool NeedsEscaping(UnicodeCategory category)
            {
                switch (category)
                {
                    case UnicodeCategory.Control:
                    case UnicodeCategory.OtherNotAssigned:
                    case UnicodeCategory.ParagraphSeparator:
                    case UnicodeCategory.LineSeparator:
                    case UnicodeCategory.Surrogate:
                        return true;
                    default:
                        return false;
                }
            }
        }
 
        private static string EscapeForNonRawVerbatimStringLiteral(bool isInterpolated, bool trySkipExistingEscapes, string value)
        {
            using var _ = PooledStringBuilder.GetInstance(out var builder);
 
            // First, go through and see if we're escaping *anything* in the original.  If so, then we'll escape
            // everything.  In other words, say we're pasting `[SuppressMessage("", "CA2013")]`.  We technically don't
            // need to escape the `""` (since that is legal in a verbatim string).  However, we will be escaping the
            // quotes in teh `"CA2013"` to become `""CA2013""`.  Once we decide we're escaping some quotes, we should
            // then realize that we *should* escape the `""` to `""""` to be consistent.
 
            // So if we determine that we will be escaping all code, then just recurse, this time setting
            // trySkipExistingEscapes to false.  That will prevent calling back into this check and it means whatever we
            // run into we will escape.
            if (trySkipExistingEscapes && WillEscapeAnyCharacters(isInterpolated, value))
                return EscapeForNonRawVerbatimStringLiteral(isInterpolated, trySkipExistingEscapes: false, value);
 
            for (var i = 0; i < value.Length; i++)
            {
                var ch = value[i];
                var nextCh = i == value.Length - 1 ? 0 : value[i + 1];
 
                builder.Append(ch);
 
                // if we see a special character then skip the following one if the following one already escapes it.
                // Otherwise, if it's not already escaped, then escape it.
                if (ch == '"')
                {
                    builder.Append(ch);
 
                    if (trySkipExistingEscapes && nextCh == ch)
                        i++;
                }
                else if (isInterpolated && ch is '{' or '}')
                {
                    builder.Append(ch);
 
                    if (trySkipExistingEscapes && nextCh == ch)
                        i++;
                }
            }
 
            return builder.ToString();
 
            static bool WillEscapeAnyCharacters(bool isInterpolated, string value)
            {
                for (var i = 0; i < value.Length; i++)
                {
                    var ch = value[i];
                    var nextCh = i == value.Length - 1 ? 0 : value[i + 1];
 
                    if (ch == '"')
                    {
                        // we have an isolated quote.  we will need to escape it (and thus should escape everything in
                        // the string.
                        if (nextCh != ch)
                            return true;
 
                        // Quotes are paired.  This is already escaped fine.  Skip both quotes.
                        i++;
                    }
                    else if (isInterpolated && ch is '{' or '}')
                    {
                        // we have an isolated brace.  we will need to escape it (and thus should escape everything in
                        // the string.
                        if (nextCh != ch)
                            return true;
 
                        // Braces are paired.  This is already escaped fine.  Skip both braces.
                        i++;
                    }
 
                    // continue looking forward.
                }
 
                return false;
            }
        }
 
        /// <summary>
        /// Given a set of source text lines, determines what common whitespace prefix each line has.  Note that this
        /// does *not* include the first line as it's super common for someone to copy a set of lines while only
        /// starting the selection at the start of the content on the first line.  This also does not include empty
        /// lines as they're also very common, but are clearly not a way of indicating indentation indent for the normal
        /// lines.
        /// </summary>
        public static string? GetCommonIndentationPrefix(INormalizedTextChangeCollection textChanges)
        {
            string? commonIndentPrefix = null;
            var first = true;
 
            foreach (var change in textChanges)
            {
                var text = SourceText.From(change.NewText);
                foreach (var line in text.Lines)
                {
                    if (first)
                    {
                        first = false;
                        continue;
                    }
 
                    var nonWhitespaceIndex = GetFirstNonWhitespaceIndex(text, line);
                    if (nonWhitespaceIndex >= 0)
                        commonIndentPrefix = GetCommonIndentationPrefix(commonIndentPrefix, text, TextSpan.FromBounds(line.Start, nonWhitespaceIndex));
                }
            }
 
            return commonIndentPrefix;
        }
 
        private static string? GetCommonIndentationPrefix(string? commonIndentPrefix, SourceText text, TextSpan lineWhitespaceSpan)
        {
            // first line with indentation whitespace we're seeing.  Just keep track of that.
            if (commonIndentPrefix == null)
                return text.ToString(lineWhitespaceSpan);
 
            // we have indentation whitespace from a previous line.  Figure out the max commonality between it and the
            // line we're currently looking at.
            var commonPrefixLength = 0;
            for (var n = Math.Min(commonIndentPrefix.Length, lineWhitespaceSpan.Length); commonPrefixLength < n; commonPrefixLength++)
            {
                if (commonIndentPrefix[commonPrefixLength] != text[lineWhitespaceSpan.Start + commonPrefixLength])
                    break;
            }
 
            return commonIndentPrefix[..commonPrefixLength];
        }
 
        public static TextSpan MapSpan(TextSpan span, ITextSnapshot from, ITextSnapshot to)
            => from.CreateTrackingSpan(span.ToSpan(), SpanTrackingMode.EdgeInclusive).GetSpan(to).Span.ToTextSpan();
 
        public static bool RawContentMustBeMultiLine(SourceText text, ImmutableArray<TextSpan> spans)
        {
            Contract.ThrowIfTrue(spans.Length == 0);
 
            // Empty raw string must be multiline.
            if (spans is [{ IsEmpty: true }])
                return true;
 
            // Or if it starts/ends with a quote 
            if (spans.First().Length > 0 && text[spans.First().Start] == '"')
                return true;
 
            if (spans.Last().Length > 0 && text[spans.Last().End - 1] == '"')
                return true;
 
            // or contains a newline
            foreach (var span in spans)
            {
                for (var i = span.Start; i < span.End; i++)
                {
                    if (SyntaxFacts.IsNewLine(text[i]))
                        return true;
                }
            }
 
            return false;
        }
    }
}