File: EmbeddedLanguages\StackFrame\StackFrameLexer.cs
Web Access
Project: ..\..\..\src\Features\Core\Portable\Microsoft.CodeAnalysis.Features.csproj (Microsoft.CodeAnalysis.Features)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Shared.Utilities;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
 
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.StackFrame
{
    using StackFrameToken = EmbeddedSyntaxToken<StackFrameKind>;
    using StackFrameTrivia = EmbeddedSyntaxTrivia<StackFrameKind>;
 
    internal struct StackFrameLexer
    {
        public readonly VirtualCharSequence Text;
        public int Position { get; private set; }
 
        private StackFrameLexer(string text)
            : this(VirtualCharSequence.Create(0, text))
        {
        }
 
        private StackFrameLexer(VirtualCharSequence text) : this()
            => Text = text;
 
        public static StackFrameLexer? TryCreate(string text)
        {
            foreach (var c in text)
            {
                if (c == '\r' || c == '\n')
                {
                    return null;
                }
            }
 
            return new(text);
        }
 
        public static StackFrameLexer? TryCreate(VirtualCharSequence text)
        {
            foreach (var c in text)
            {
                if (c.Value == '\r' || c.Value == '\n')
                {
                    return null;
                }
            }
 
            return new(text);
        }
 
        public readonly VirtualChar CurrentChar => Position < Text.Length ? Text[Position] : default;
 
        public readonly VirtualCharSequence GetSubSequenceToCurrentPos(int start)
            => GetSubSequence(start, Position);
 
        public readonly VirtualCharSequence GetSubSequence(int start, int end)
            => Text.GetSubSequence(TextSpan.FromBounds(start, end));
 
        public StackFrameTrivia? TryScanRemainingTrivia()
        {
            if (Position == Text.Length)
            {
                return null;
            }
 
            var start = Position;
            Position = Text.Length;
 
            return CreateTrivia(StackFrameKind.SkippedTextTrivia, GetSubSequenceToCurrentPos(start));
        }
 
        public StackFrameToken? TryScanIdentifier()
            => TryScanIdentifier(scanAtTrivia: false, scanLeadingWhitespace: false, scanTrailingWhitespace: false);
 
        public StackFrameToken? TryScanIdentifier(bool scanAtTrivia, bool scanLeadingWhitespace, bool scanTrailingWhitespace)
        {
            var originalPosition = Position;
            var atTrivia = scanAtTrivia ? TryScanAtTrivia() : null;
            var leadingWhitespace = scanLeadingWhitespace ? TryScanWhiteSpace() : null;
 
            var startPosition = Position;
            var ch = CurrentChar;
            if (!UnicodeCharacterUtilities.IsIdentifierStartCharacter((char)ch.Value))
            {
                // If we scan only trivia but don't get an identifier, we want to make sure
                // to reset back to this original position to let the trivia be consumed
                // in some other fashion if necessary 
                Position = originalPosition;
                return null;
            }
 
            while (UnicodeCharacterUtilities.IsIdentifierPartCharacter((char)ch.Value))
            {
                Position++;
                ch = CurrentChar;
            }
 
            var identifierSequence = GetSubSequenceToCurrentPos(startPosition);
            var trailingWhitespace = scanTrailingWhitespace ? TryScanWhiteSpace() : null;
 
            return CreateToken(
                StackFrameKind.IdentifierToken,
                leadingTrivia: CreateTrivia(atTrivia, leadingWhitespace),
                identifierSequence,
                trailingTrivia: CreateTrivia(trailingWhitespace));
        }
 
        public readonly StackFrameToken CurrentCharAsToken()
        {
            if (Position == Text.Length)
            {
                return CreateToken(StackFrameKind.EndOfFrame, VirtualCharSequence.Empty);
            }
 
            var ch = Text[Position];
            return CreateToken(GetKind(ch), Text.GetSubSequence(new TextSpan(Position, 1)));
        }
 
        /// <summary>
        /// Progress the position by one if the current character
        /// matches the kind.
        /// </summary>
        /// <returns>
        /// <see langword="true"/> if the position was incremented
        /// </returns>
        public bool ScanCurrentCharAsTokenIfMatch(StackFrameKind kind, out StackFrameToken token)
            => ScanCurrentCharAsTokenIfMatch(kind, scanTrailingWhitespace: false, out token);
 
        /// <summary>
        /// Progress the position by one if the current character
        /// matches the kind.
        /// </summary>
        /// <returns>
        /// <see langword="true"/> if the position was incremented
        /// </returns>
        public bool ScanCurrentCharAsTokenIfMatch(StackFrameKind kind, bool scanTrailingWhitespace, out StackFrameToken token)
        {
            if (GetKind(CurrentChar) == kind)
            {
                token = CurrentCharAsToken();
                Position++;
 
                if (scanTrailingWhitespace)
                {
                    token = token.With(trailingTrivia: CreateTrivia(TryScanWhiteSpace()));
                }
 
                return true;
            }
 
            token = default;
            return false;
        }
 
        /// <summary>
        /// Progress the position by one if the current character
        /// matches the kind.
        /// </summary>
        /// <returns>
        /// <see langword="true"/> if the position was incremented
        /// </returns>
        public bool ScanCurrentCharAsTokenIfMatch(Func<StackFrameKind, bool> isMatch, out StackFrameToken token)
        {
            if (isMatch(GetKind(CurrentChar)))
            {
                token = CurrentCharAsToken();
                Position++;
                return true;
            }
 
            token = default;
            return false;
        }
 
        public StackFrameTrivia? TryScanAtTrivia()
        {
            foreach (var language in s_languages)
            {
                var result = TryScanStringTrivia(language.At, StackFrameKind.AtTrivia);
                if (result.HasValue)
                {
                    return result.Value;
                }
            }
 
            return null;
        }
 
        public StackFrameTrivia? TryScanInTrivia()
        {
            foreach (var language in s_languages)
            {
                var result = TryScanStringTrivia(language.In, StackFrameKind.InTrivia);
                if (result.HasValue)
                {
                    return result.Value;
                }
            }
 
            return null;
        }
 
        public StackFrameTrivia? TryScanLineTrivia()
        {
            foreach (var language in s_languages)
            {
                var result = TryScanStringTrivia(language.Line, StackFrameKind.LineTrivia);
                if (result.HasValue)
                {
                    return result.Value;
                }
            }
 
            return null;
        }
 
        /// <summary>
        /// Attempts to parse <see cref="StackFrameKind.InTrivia"/> and a path following https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
        /// Uses <see cref="FileInfo"/> as a tool to determine if the path is correct for returning. 
        /// </summary>
        public Result<StackFrameToken> TryScanPath()
        {
            var inTrivia = TryScanInTrivia();
            if (!inTrivia.HasValue)
            {
                return Result<StackFrameToken>.Empty;
            }
 
            var startPosition = Position;
 
            while (Position < Text.Length)
            {
                // Path needs to do a look ahead to determine if adding the next character
                // invalidates the path. Break if it does.
                //
                // This helps to keep the complex rules for what FileInfo does to validate path by calling to it directly.
                // We can't simply check all invalid characters for a path because location in the path is important, and we're not 
                // in the business of validating if something is correctly a file. It's cheap enough to let the FileInfo constructor do that. The downside 
                // is that we are constructing a new object with every pass. If this becomes problematic we can revisit and fine a more 
                // optimized pattern to handle all of the edge cases.
                //
                // Example: C:\my\path \:other
                //                      ^-- the ":" breaks the path, but we can't simply break on all ":" (which is included in the invalid characters for Path)
                //
 
                var str = GetSubSequence(startPosition, Position + 1).CreateString();
 
                var isValidPath = IOUtilities.PerformIO(() =>
                {
                    var fileInfo = new FileInfo(str);
                    return true;
                }, false);
 
                if (!isValidPath)
                {
                    break;
                }
 
                Position++;
            }
 
            if (startPosition == Position)
            {
                return Result<StackFrameToken>.Abort;
            }
 
            return CreateToken(StackFrameKind.PathToken, inTrivia.ToImmutableArray(), GetSubSequenceToCurrentPos(startPosition));
        }
 
        /// <summary>
        /// Returns a number token with the <see cref="StackFrameKind.LineTrivia"/> and remaining <see cref="StackFrameKind.SkippedTextTrivia"/>
        /// attached to it. 
        /// </summary>
        /// <returns></returns>
        public StackFrameToken? TryScanRequiredLineNumber()
        {
            var lineTrivia = TryScanLineTrivia();
            if (!lineTrivia.HasValue)
            {
                return null;
            }
 
            var numberToken = TryScanNumbers();
            if (!numberToken.HasValue)
            {
                return null;
            }
 
            var remainingTrivia = TryScanRemainingTrivia();
 
            return numberToken.Value.With(
                leadingTrivia: lineTrivia.ToImmutableArray(),
                trailingTrivia: remainingTrivia.ToImmutableArray());
        }
 
        public StackFrameToken? TryScanNumbers()
        {
            var start = Position;
            while (IsNumber(CurrentChar))
            {
                Position++;
            }
 
            if (start == Position)
            {
                return null;
            }
 
            return CreateToken(StackFrameKind.NumberToken, GetSubSequenceToCurrentPos(start));
        }
 
        /// <summary>
        /// Scans a form similar to g__, where g is a GeneratedNameKind (a single character)
        /// and identifier is valid identifier characters as with <see cref="TryScanIdentifier()"/>
        /// </summary>
        public Result<StackFrameToken> TryScanRequiredGeneratedNameSeparator()
        {
            var start = Position;
            if (IsAsciiAlphaCharacter(CurrentChar))
            {
                Position++;
            }
            else
            {
                return Result<StackFrameToken>.Abort;
            }
 
            if (IsStringAtPosition("__"))
            {
                Position += 2;
            }
            else
            {
                return Result<StackFrameToken>.Abort;
            }
 
            return CreateToken(StackFrameKind.GeneratedNameSeparatorToken, GetSubSequenceToCurrentPos(start));
        }
 
        /// <summary>
        /// In a generated name for local functions the final portion is {numeric}_{numeric}
        /// </summary>
        public Result<StackFrameToken> TryScanRequiredGeneratedNameSuffix()
        {
            var start = Position;
 
            var n1 = TryScanNumbers();
            if (!n1.HasValue)
            {
                return Result<StackFrameToken>.Abort;
            }
 
            if (IsStringAtPosition("_"))
            {
                Position++;
            }
            else
            {
                return Result<StackFrameToken>.Abort;
            }
 
            var n2 = TryScanNumbers();
            if (!n2.HasValue)
            {
                return Result<StackFrameToken>.Abort;
            }
 
            return CreateToken(StackFrameKind.GeneratedNameSuffixToken, GetSubSequenceToCurrentPos(start));
        }
 
        public static bool IsBlank(VirtualChar ch)
        {
            // List taken from the native regex parser.
            switch (ch.Value)
            {
                case '\u0009':
                case '\u000A':
                case '\u000C':
                case '\u000D':
                case ' ':
                    return true;
                default:
                    return false;
            }
        }
 
        private static bool IsAsciiAlphaCharacter(VirtualChar ch)
            => ch.Value is (>= 'a' and <= 'z') or (>= 'A' and <= 'Z');
 
        public static StackFrameToken CreateToken(StackFrameKind kind, VirtualCharSequence virtualChars)
            => CreateToken(kind, ImmutableArray<StackFrameTrivia>.Empty, virtualChars);
 
        public static StackFrameToken CreateToken(StackFrameKind kind, ImmutableArray<StackFrameTrivia> leadingTrivia, VirtualCharSequence virtualChars)
            => new(kind, leadingTrivia, virtualChars, ImmutableArray<StackFrameTrivia>.Empty, ImmutableArray<EmbeddedDiagnostic>.Empty, value: null!);
 
        public static StackFrameToken CreateToken(StackFrameKind kind, ImmutableArray<StackFrameTrivia> leadingTrivia, VirtualCharSequence virtualChars, ImmutableArray<StackFrameTrivia> trailingTrivia)
            => new(kind, leadingTrivia, virtualChars, trailingTrivia, ImmutableArray<EmbeddedDiagnostic>.Empty, value: null!);
 
        private static StackFrameTrivia CreateTrivia(StackFrameKind kind, VirtualCharSequence virtualChars)
            => CreateTrivia(kind, virtualChars, ImmutableArray<EmbeddedDiagnostic>.Empty);
 
        private static StackFrameTrivia CreateTrivia(StackFrameKind kind, VirtualCharSequence virtualChars, ImmutableArray<EmbeddedDiagnostic> diagnostics)
        {
            // Empty trivia is not supported in StackFrames
            Debug.Assert(virtualChars.Length > 0);
            return new(kind, virtualChars, diagnostics);
        }
 
        private static ImmutableArray<StackFrameTrivia> CreateTrivia(params StackFrameTrivia?[] triviaArray)
        {
            using var _ = ArrayBuilder<StackFrameTrivia>.GetInstance(out var builder);
            foreach (var trivia in triviaArray)
            {
                if (trivia.HasValue)
                {
                    builder.Add(trivia.Value);
                }
            }
 
            return builder.ToImmutable();
        }
 
        private readonly bool IsStringAtPosition(string val)
           => IsAtStartOfText(Position, val);
 
        private readonly bool IsAtStartOfText(int position, string val)
        {
            for (var i = 0; i < val.Length; i++)
            {
                if (position + i >= Text.Length ||
                    Text[position + i] != val[i])
                {
                    return false;
                }
            }
 
            return true;
        }
 
        private StackFrameTrivia? TryScanStringTrivia(string valueToLookFor, StackFrameKind triviaKind)
        {
            if (IsStringAtPosition(valueToLookFor))
            {
                var start = Position;
                Position += valueToLookFor.Length;
 
                return CreateTrivia(triviaKind, GetSubSequenceToCurrentPos(start));
            }
 
            return null;
        }
 
        private StackFrameTrivia? TryScanWhiteSpace()
        {
            var startPosition = Position;
 
            while (IsBlank(CurrentChar))
            {
                Position++;
            }
 
            if (Position == startPosition)
            {
                return null;
            }
 
            return CreateTrivia(StackFrameKind.WhitespaceTrivia, GetSubSequenceToCurrentPos(startPosition));
        }
 
        private static StackFrameKind GetKind(VirtualChar ch)
            => ch.Value switch
            {
                '\n' => throw new InvalidOperationException(),
                '\r' => throw new InvalidOperationException(),
                '&' => StackFrameKind.AmpersandToken,
                '[' => StackFrameKind.OpenBracketToken,
                ']' => StackFrameKind.CloseBracketToken,
                '(' => StackFrameKind.OpenParenToken,
                ')' => StackFrameKind.CloseParenToken,
                '.' => StackFrameKind.DotToken,
                '+' => StackFrameKind.PlusToken,
                ',' => StackFrameKind.CommaToken,
                ':' => StackFrameKind.ColonToken,
                '=' => StackFrameKind.EqualsToken,
                '>' => StackFrameKind.GreaterThanToken,
                '<' => StackFrameKind.LessThanToken,
                '-' => StackFrameKind.MinusToken,
                '\'' => StackFrameKind.SingleQuoteToken,
                '`' => StackFrameKind.GraveAccentToken,
                '\\' => StackFrameKind.BackslashToken,
                '/' => StackFrameKind.ForwardSlashToken,
                '$' => StackFrameKind.DollarToken,
                '|' => StackFrameKind.PipeToken,
                _ => IsBlank(ch)
                    ? StackFrameKind.WhitespaceTrivia
                    : IsNumber(ch)
                        ? StackFrameKind.NumberToken
                        : StackFrameKind.SkippedTextTrivia
            };
 
        private static bool IsNumber(VirtualChar ch)
            => ch.Value is >= '0' and <= '9';
 
        private readonly record struct Language(string At, string In, string Line);
        private static readonly ImmutableArray<Language> s_languages = ImmutableArray.Create(
            new Language("at ", " in ", "line "), // en
            new Language("v ", " v ", "řádek "), // cs
            new Language("bei ", " in ", "Zeile "), // de
            new Language("en ", " en ", "línea "), // es
            new Language("à ", " dans ", "ligne "), // fr
            new Language("in ", " in ", "riga "), // it
            new Language("場所 ", " 場所 ", "行 "), // ja
            new Language("위치: ", " 파일 ", "줄 "), // ko
            new Language("w ", " w ", "wiersz "), // pl
            new Language("em ", " na ", "linha "), // pt-BR
            new Language("в ", " в ", "строка "), // ru
            new Language("在 ", " 位置 ", "行号 "), // zh-Hans
            new Language("於 ", " 於 ", " 行 ") // zh-Hant
            );
    }
}