File: StringBreaker.cs
Web Access
Project: ..\..\..\src\Workspaces\Core\Portable\Microsoft.CodeAnalysis.Workspaces.csproj (Microsoft.CodeAnalysis.Workspaces)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.Diagnostics;
using Microsoft.CodeAnalysis.Shared.Collections;
using Microsoft.CodeAnalysis.Text;
 
namespace Microsoft.CodeAnalysis.Shared.Utilities
{
    internal static class StringBreaker
    {
        /// <summary>
        /// Breaks an identifier string into constituent parts.
        /// </summary>
        public static void AddWordParts(string identifier, ref TemporaryArray<TextSpan> parts)
            => AddParts(identifier, word: true, ref parts);
 
        public static void AddCharacterParts(string identifier, ref TemporaryArray<TextSpan> parts)
            => AddParts(identifier, word: false, ref parts);
 
        public static void AddParts(string text, bool word, ref TemporaryArray<TextSpan> parts)
        {
            for (var start = 0; start < text.Length;)
            {
                var span = StringBreaker.GenerateSpan(text, start, word);
                if (span.IsEmpty)
                {
                    // All done
                    break;
                }
 
                Debug.Assert(span.Start >= start, "Bad generator.");
 
                parts.Add(span);
                start = span.End;
            }
        }
 
        public static TextSpan GenerateSpan(string identifier, int wordStart, bool word)
        {
            var length = identifier.Length;
            wordStart = SkipPunctuation(identifier, length, wordStart);
            if (wordStart < length)
            {
                var firstChar = identifier[wordStart];
                if (char.IsUpper(firstChar))
                {
                    if (wordStart + 1 == length)
                    {
                        return new TextSpan(wordStart, 1);
                    }
 
                    if (word)
                    {
                        return ScanWordRun(identifier, length, wordStart);
                    }
                    else
                    {
                        return ScanCharacterRun(identifier, length, wordStart);
                    }
                }
                else if (IsLower(firstChar))
                {
                    return ScanLowerCaseRun(identifier, length, wordStart);
                }
                else if (firstChar == '_')
                {
                    return new TextSpan(wordStart, 1);
                }
                else if (char.IsDigit(firstChar))
                {
                    return ScanNumber(identifier, length, wordStart);
                }
            }
 
            return default;
        }
 
        private static TextSpan ScanCharacterRun(string identifier, int length, int wordStart)
        {
            // In a character run, if we have XMLDocument, then we will break that up into
            // X, M, L, and Document.
            var current = wordStart + 1;
            Debug.Assert(current < length);
            var c = identifier[current];
 
            if (IsLower(c))
            {
                // "Do"
                // 
                // scan the lowercase letters from here on to scna out 'Document'.
                return ScanLowerCaseRun(identifier, length, wordStart);
            }
            else
            {
                return new TextSpan(wordStart, 1);
            }
        }
 
        private static TextSpan ScanWordRun(string identifier, int length, int wordStart)
        {
            // In a word run, if we have XMLDocument, then we will break that up into
            // XML and Document.
 
            var current = wordStart + 1;
            Debug.Assert(current < length);
            var c = identifier[current];
 
            if (char.IsUpper(c))
            {
                // "XM"
 
                current++;
 
                // scan all the upper case letters until we hit one followed by a lower
                // case letter.
                while (current < length && char.IsUpper(identifier[current]))
                {
                    current++;
                }
 
                if (current < length && IsLower(identifier[current]))
                {
                    // hit the 'o' in XMLDo.  Return "XML"
                    Debug.Assert(char.IsUpper(identifier[current - 1]));
                    var end = current - 1;
                    return new TextSpan(wordStart, end - wordStart);
                }
                else
                {
                    // Hit something else (punctuation, end of string, etc.)
                    // return the entire upper-case section.
                    return new TextSpan(wordStart, current - wordStart);
                }
            }
            else if (IsLower(c))
            {
                // "Do"
                // 
                // scan the lowercase letters from here on to scan out 'Document'.
                return ScanLowerCaseRun(identifier, length, wordStart);
            }
            else
            {
                return new TextSpan(wordStart, 1);
            }
        }
 
        private static TextSpan ScanLowerCaseRun(string identifier, int length, int wordStart)
        {
            var current = wordStart + 1;
            while (current < length && IsLower(identifier[current]))
            {
                current++;
            }
 
            return new TextSpan(wordStart, current - wordStart);
        }
 
        private static TextSpan ScanNumber(string identifier, int length, int wordStart)
        {
            var current = wordStart + 1;
            while (current < length && char.IsDigit(identifier[current]))
            {
                current++;
            }
 
            return TextSpan.FromBounds(wordStart, current);
        }
 
        private static int SkipPunctuation(string identifier, int length, int wordStart)
        {
            while (wordStart < length)
            {
                var ch = identifier[wordStart];
                if (ch != '_' && char.IsPunctuation(ch))
                {
                    wordStart++;
                    continue;
                }
 
                break;
            }
 
            return wordStart;
        }
 
        private static bool IsLower(char c)
        {
            if (IsAscii(c))
            {
                return c is >= 'a' and <= 'z';
            }
 
            return char.IsLower(c);
        }
 
        private static bool IsAscii(char v)
            => v < 0x80;
    }
}