File: AbstractVirtualCharService.cs
Web Access
Project: ..\..\..\src\Workspaces\Core\Portable\Microsoft.CodeAnalysis.Workspaces.csproj (Microsoft.CodeAnalysis.Workspaces)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Diagnostics;
using System.Text;
using Microsoft.CodeAnalysis.Collections;
using Microsoft.CodeAnalysis.LanguageService;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
 
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars
{
    internal abstract partial class AbstractVirtualCharService : IVirtualCharService
    {
        public abstract bool TryGetEscapeCharacter(VirtualChar ch, out char escapedChar);
 
        protected abstract ISyntaxFacts SyntaxFacts { get; }
 
        protected abstract VirtualCharSequence TryConvertToVirtualCharsWorker(SyntaxToken token);
        protected abstract bool IsMultiLineRawStringToken(SyntaxToken token);
 
        /// <summary>
        /// Returns <see langword="true"/> if the next two characters at <c>tokenText[index]</c> are <c>{{</c> or
        /// <c>}}</c>.  If so, <paramref name="span"/> will contain the span of those two characters (based on <paramref
        /// name="tokenText"/> starting at <paramref name="offset"/>).
        /// </summary>
        protected static bool IsLegalBraceEscape(
            string tokenText, int index, int offset, out TextSpan span)
        {
            if (index + 1 < tokenText.Length)
            {
                var ch = tokenText[index];
                var next = tokenText[index + 1];
                if ((ch == '{' && next == '{') ||
                    (ch == '}' && next == '}'))
                {
                    span = new TextSpan(offset + index, 2);
                    return true;
                }
            }
 
            span = default;
            return false;
        }
 
        public VirtualCharSequence TryConvertToVirtualChars(SyntaxToken token)
        {
            // We don't process any strings that contain diagnostics in it.  That means that we can 
            // trust that all the string's contents (most importantly, the escape sequences) are well
            // formed.
            if (token.ContainsDiagnostics)
            {
                return default;
            }
 
            var result = TryConvertToVirtualCharsWorker(token);
            CheckInvariants(token, result);
 
            return result;
        }
 
        [Conditional("DEBUG")]
        private void CheckInvariants(SyntaxToken token, VirtualCharSequence result)
        {
            // Do some invariant checking to make sure we processed the string token the same
            // way the C# and VB compilers did.
            if (!result.IsDefault)
            {
                // Ensure that we properly broke up the token into a sequence of characters that
                // matches what the compiler did.
                var syntaxKinds = this.SyntaxFacts.SyntaxKinds;
                if (token.RawKind == syntaxKinds.StringLiteralToken ||
                    token.RawKind == syntaxKinds.Utf8StringLiteralToken ||
                    token.RawKind == syntaxKinds.CharacterLiteralToken)
                {
                    var expectedValueText = token.ValueText;
                    var actualValueText = result.CreateString();
                    Debug.Assert(expectedValueText == actualValueText);
                }
 
                if (result.Length > 0)
                {
                    var currentVC = result[0];
                    Debug.Assert(currentVC.Span.Start >= token.SpanStart, "First span has to start after the start of the string token");
                    if (token.RawKind == syntaxKinds.StringLiteralToken ||
                        token.RawKind == syntaxKinds.CharacterLiteralToken)
                    {
                        Debug.Assert(currentVC.Span.Start == token.SpanStart + 1 ||
                                     currentVC.Span.Start == token.SpanStart + 2, "First span should start on the second or third char of the string.");
                    }
 
                    if (IsMultiLineRawStringToken(token))
                    {
                        for (var i = 1; i < result.Length; i++)
                        {
                            var nextVC = result[i];
                            Debug.Assert(currentVC.Span.End <= nextVC.Span.Start, "Virtual character spans have to be ordered.");
                            currentVC = nextVC;
                        }
                    }
                    else
                    {
                        for (var i = 1; i < result.Length; i++)
                        {
                            var nextVC = result[i];
                            Debug.Assert(currentVC.Span.End == nextVC.Span.Start, "Virtual character spans have to be touching.");
                            currentVC = nextVC;
                        }
                    }
 
                    var lastVC = result.Last();
 
                    if (token.RawKind == syntaxKinds.StringLiteralToken ||
                        token.RawKind == syntaxKinds.CharacterLiteralToken)
                    {
                        Debug.Assert(lastVC.Span.End == token.Span.End - "\"".Length, "Last span has to end right before the end of the string token.");
                    }
                    else if (token.RawKind == syntaxKinds.Utf8StringLiteralToken)
                    {
                        Debug.Assert(lastVC.Span.End == token.Span.End - "\"u8".Length, "Last span has to end right before the end of the string token.");
                    }
                }
            }
        }
 
        /// <summary>
        /// Helper to convert simple string literals that escape quotes by doubling them.  This is 
        /// how normal VB literals and c# verbatim string literals work.
        /// </summary>
        /// <param name="startDelimiter">The start characters string.  " in VB and @" in C#</param>
        protected static VirtualCharSequence TryConvertSimpleDoubleQuoteString(
            SyntaxToken token, string startDelimiter, string endDelimiter, bool escapeBraces)
        {
            Debug.Assert(!token.ContainsDiagnostics);
 
            if (escapeBraces)
            {
                Debug.Assert(startDelimiter == "");
                Debug.Assert(endDelimiter == "");
            }
 
            var tokenText = token.Text;
 
            if (startDelimiter.Length > 0 && !tokenText.StartsWith(startDelimiter, StringComparison.Ordinal))
            {
                Debug.Assert(false, "This should not be reachable as long as the compiler added no diagnostics.");
                return default;
            }
 
            if (endDelimiter.Length > 0 && !tokenText.EndsWith(endDelimiter, StringComparison.Ordinal))
            {
                Debug.Assert(false, "This should not be reachable as long as the compiler added no diagnostics.");
                return default;
            }
 
            var startIndexInclusive = startDelimiter.Length;
            var endIndexExclusive = tokenText.Length - endDelimiter.Length;
 
            var result = ImmutableSegmentedList.CreateBuilder<VirtualChar>();
            var offset = token.SpanStart;
 
            for (var index = startIndexInclusive; index < endIndexExclusive;)
            {
                if (tokenText[index] == '"' && tokenText[index + 1] == '"')
                {
                    result.Add(VirtualChar.Create(new Rune('"'), new TextSpan(offset + index, 2)));
                    index += 2;
                    continue;
                }
                else if (escapeBraces && IsOpenOrCloseBrace(tokenText[index]))
                {
                    if (!IsLegalBraceEscape(tokenText, index, offset, out var span))
                        return default;
 
                    result.Add(VirtualChar.Create(new Rune(tokenText[index]), span));
                    index += result[^1].Span.Length;
                    continue;
                }
 
                index += ConvertTextAtIndexToRune(tokenText, index, result, offset);
            }
 
            return CreateVirtualCharSequence(
                tokenText, offset, startIndexInclusive, endIndexExclusive, result);
        }
 
        /// <summary>
        /// Returns the number of characters to jump forward (either 1 or 2);
        /// </summary>
        protected static int ConvertTextAtIndexToRune(string tokenText, int index, ImmutableSegmentedList<VirtualChar>.Builder result, int offset)
            => ConvertTextAtIndexToRune(tokenText, index, new StringTextInfo(), result, offset);
 
        protected static int ConvertTextAtIndexToRune(SourceText tokenText, int index, ImmutableSegmentedList<VirtualChar>.Builder result, int offset)
            => ConvertTextAtIndexToRune(tokenText, index, new SourceTextTextInfo(), result, offset);
 
        private static int ConvertTextAtIndexToRune<T, TTextInfo>(
            T tokenText, int index, TTextInfo info, ImmutableSegmentedList<VirtualChar>.Builder result, int offset)
            where TTextInfo : struct, ITextInfo<T>
        {
            var ch = info.Get(tokenText, index);
 
            if (Rune.TryCreate(ch, out var rune))
            {
                // First, see if this was a single char that can become a rune (the common case).
                result.Add(VirtualChar.Create(rune, new TextSpan(offset + index, 1)));
                return 1;
            }
            else if (index + 1 < info.Length(tokenText) &&
                     Rune.TryCreate(ch, info.Get(tokenText, index + 1), out rune))
            {
                // Otherwise, see if we have a surrogate pair (less common, but possible).
                result.Add(VirtualChar.Create(rune, new TextSpan(offset + index, 2)));
                return 2;
            }
            else
            {
                // Something that couldn't be encoded as runes.
                Debug.Assert(char.IsSurrogate(ch));
                result.Add(VirtualChar.Create(ch, new TextSpan(offset + index, 1)));
                return 1;
            }
        }
 
        protected static bool IsOpenOrCloseBrace(char ch)
            => ch is '{' or '}';
 
        protected static VirtualCharSequence CreateVirtualCharSequence(
            string tokenText, int offset,
            int startIndexInclusive, int endIndexExclusive,
            ImmutableSegmentedList<VirtualChar>.Builder result)
        {
            // Check if we actually needed to create any special virtual chars.
            // if not, we can avoid the entire array allocation and just wrap
            // the text of the token and pass that back.
 
            var textLength = endIndexExclusive - startIndexInclusive;
            if (textLength == result.Count)
            {
                var sequence = VirtualCharSequence.Create(offset, tokenText);
                return sequence.GetSubSequence(TextSpan.FromBounds(startIndexInclusive, endIndexExclusive));
            }
 
            return VirtualCharSequence.Create(result.ToImmutable());
        }
    }
}