File: IVirtualCharService.cs
Web Access
Project: ..\..\..\src\CodeStyle\Core\Analyzers\Microsoft.CodeAnalysis.CodeStyle.csproj (Microsoft.CodeAnalysis.CodeStyle)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using Microsoft.CodeAnalysis.Text;
 
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars
{
    /// <summary>
    /// Helper service that takes the raw text of a string token and produces the individual
    /// characters that raw string token represents (i.e. with escapes collapsed).  The difference
    /// between this and the result from token.ValueText is that for each collapsed character
    /// returned the original span of text in the original token can be found.  i.e. if you had the
    /// following in C#:
    ///
    /// "G\u006fo"
    ///
    /// Then you'd get back:
    ///
    /// 'G' -> [0, 1) 'o' -> [1, 7) 'o' -> [7, 1)
    ///
    /// This allows for embedded language processing that can refer back to the users' original code
    /// instead of the escaped value we're processing.
    /// </summary>
    internal interface IVirtualCharService
    {
        /// <summary>
        /// <para>
        /// Takes in a string token and return the <see cref="VirtualChar"/>s corresponding to each
        /// char of the tokens <see cref="SyntaxToken.ValueText"/>.  In other words, for each char
        /// in ValueText there will be a VirtualChar in the resultant array.  Each VirtualChar will
        /// specify what char the language considers them to represent, as well as the span of text
        /// in the original <see cref="SourceText"/> that the language created that char from. 
        /// </para>
        /// <para>
        /// For most chars this will be a single character span.  i.e. 'c' -> 'c'.  However, for
        /// escapes this may be a multi character span.  i.e. 'c' -> '\u0063'
        /// </para>
        /// <para>
        /// If the token is not a string literal token, or the string literal has any diagnostics on
        /// it, then <see langword="default"/> will be returned.   Additionally, because a
        /// VirtualChar can only represent a single char, while some escape sequences represent
        /// multiple chars, <see langword="default"/> will also be returned in those cases. All
        /// these cases could be relaxed in the future.  But they greatly simplify the
        /// implementation.
        /// </para>
        /// <para>
        /// If this function succeeds, certain invariants will hold.  First, each character in the
        /// sequence of characters in <paramref name="token"/>.ValueText will become a single
        /// VirtualChar in the result array with a matching <see cref="VirtualChar.Rune"/> property.
        /// Similarly, each VirtualChar's <see cref="VirtualChar.Span"/> will abut each other, and
        /// the union of all of them will cover the span of the token's <see
        /// cref="SyntaxToken.Text"/>
        /// *not* including the start and quotes.
        /// </para>
        /// <para>
        /// In essence the VirtualChar array acts as the information explaining how the <see
        /// cref="SyntaxToken.Text"/> of the token between the quotes maps to each character in the
        /// token's <see cref="SyntaxToken.ValueText"/>.
        /// </para>
        /// </summary>
        VirtualCharSequence TryConvertToVirtualChars(SyntaxToken token);
 
        /// <summary>
        /// Produces the appropriate escape version of <paramref name="ch"/> to be placed in a
        /// normal string literal.  For example if <paramref name="ch"/> is the <c>tab</c>
        /// character, then this would produce <c>t</c> as <c>\t</c> is what would go into a string
        /// literal.
        /// </summary>
        bool TryGetEscapeCharacter(VirtualChar ch, out char escapeChar);
    }
}