File: Text\SourceTextTests.cs
Web Access
Project: ..\..\..\src\Compilers\Core\CodeAnalysisTest\Microsoft.CodeAnalysis.UnitTests.csproj (Microsoft.CodeAnalysis.UnitTests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
#nullable disable
 
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using System.Text;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Test.Utilities;
using Xunit;
 
namespace Microsoft.CodeAnalysis.UnitTests.Text
{
    public class SourceTextTests
    {
        private static readonly Encoding s_utf8 = Encoding.UTF8;
        private static readonly Encoding s_unicode = Encoding.Unicode;
        private const string HelloWorld = "Hello, World!";
 
        [Fact]
        public void Empty()
        {
            TestIsEmpty(SourceText.From(string.Empty));
            TestIsEmpty(SourceText.From(new byte[0], 0));
            TestIsEmpty(SourceText.From(new MemoryStream()));
        }
 
        private static void TestIsEmpty(SourceText text)
        {
            Assert.Equal(0, text.Length);
            Assert.Same(string.Empty, text.ToString());
            Assert.Equal(1, text.Lines.Count);
            Assert.Equal(0, text.Lines[0].Span.Length);
        }
 
        [Fact]
        public void Encoding1()
        {
            var utf8NoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
 
            Assert.Same(s_utf8, SourceText.From(HelloWorld, s_utf8).Encoding);
            Assert.Same(s_unicode, SourceText.From(HelloWorld, s_unicode).Encoding);
 
            var bytes = s_unicode.GetBytes(HelloWorld);
            Assert.Same(s_unicode, SourceText.From(bytes, bytes.Length, s_unicode).Encoding);
            Assert.Equal(utf8NoBOM, SourceText.From(bytes, bytes.Length, null).Encoding);
 
            var stream = new MemoryStream(bytes);
            Assert.Same(s_unicode, SourceText.From(stream, s_unicode).Encoding);
            Assert.Equal(utf8NoBOM, SourceText.From(stream, null).Encoding);
        }
 
        [Fact]
        public void EncodingBOM()
        {
            var utf8BOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: true);
 
            var bytes = utf8BOM.GetPreamble().Concat(utf8BOM.GetBytes("abc")).ToArray();
            Assert.Equal(utf8BOM, SourceText.From(bytes, bytes.Length, s_unicode).Encoding);
            Assert.Equal(utf8BOM, SourceText.From(bytes, bytes.Length, null).Encoding);
 
            var stream = new MemoryStream(bytes);
            Assert.Equal(utf8BOM, SourceText.From(stream, s_unicode).Encoding);
            Assert.Equal(utf8BOM, SourceText.From(stream, null).Encoding);
        }
 
        [Fact]
        public void ChecksumAlgorithm_Default()
        {
            Assert.Equal(SourceHashAlgorithm.Sha1, SourceText.From(HelloWorld).ChecksumAlgorithm);
 
            var bytes = s_unicode.GetBytes(HelloWorld);
            Assert.Equal(SourceHashAlgorithm.Sha1, SourceText.From(bytes, bytes.Length).ChecksumAlgorithm);
 
            var stream = new MemoryStream(bytes);
            Assert.Equal(SourceHashAlgorithm.Sha1, SourceText.From(stream).ChecksumAlgorithm);
        }
 
        [Theory]
        [InlineData(SourceHashAlgorithm.Sha1)]
        [InlineData(SourceHashAlgorithm.Sha256)]
        public void ChecksumAlgorithm1(SourceHashAlgorithm algorithm)
        {
            Assert.Equal(algorithm, SourceText.From(HelloWorld, checksumAlgorithm: algorithm).ChecksumAlgorithm);
 
            var bytes = s_unicode.GetBytes(HelloWorld);
            Assert.Equal(algorithm, SourceText.From(bytes, bytes.Length, checksumAlgorithm: algorithm).ChecksumAlgorithm);
 
            var stream = new MemoryStream(bytes);
            Assert.Equal(algorithm, SourceText.From(stream, checksumAlgorithm: algorithm).ChecksumAlgorithm);
        }
 
        [WorkItem(7225, "https://github.com/dotnet/roslyn/issues/7225")]
        [Fact]
        public void ChecksumAndBOM()
        {
            const string source = "Hello, World!";
            var checksumAlgorithm = SourceHashAlgorithm.Sha1;
            var encodingNoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
            var encodingBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: true);
 
            var checksumNoBOM = ImmutableArray.Create<byte>(0xa, 0xa, 0x9f, 0x2a, 0x67, 0x72, 0x94, 0x25, 0x57, 0xab, 0x53, 0x55, 0xd7, 0x6a, 0xf4, 0x42, 0xf8, 0xf6, 0x5e, 0x1);
            var checksumBOM = ImmutableArray.Create<byte>(0xb2, 0x19, 0x0, 0x9b, 0x61, 0xce, 0xcd, 0x50, 0x7b, 0x2e, 0x56, 0x3c, 0xc0, 0xeb, 0x96, 0xe2, 0xa1, 0xd9, 0x3f, 0xfc);
 
            // SourceText from string. Checksum should include BOM from explicit encoding.
            VerifyChecksum(SourceText.From(source, encodingNoBOM, checksumAlgorithm), checksumNoBOM);
            VerifyChecksum(SourceText.From(source, encodingBOM, checksumAlgorithm), checksumBOM);
 
            var bytesNoBOM = new byte[] { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21 };
            var bytesBOM = new byte[] { 0xef, 0xbb, 0xbf, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21 };
 
            var streamNoBOM = new MemoryStream(bytesNoBOM);
            var streamBOM = new MemoryStream(bytesBOM);
 
            // SourceText from bytes no BOM. Checksum should ignore explicit encoding.
            VerifyChecksum(SourceText.From(bytesNoBOM, bytesNoBOM.Length, null, checksumAlgorithm), checksumNoBOM);
            VerifyChecksum(SourceText.From(bytesNoBOM, bytesNoBOM.Length, encodingNoBOM, checksumAlgorithm), checksumNoBOM);
            VerifyChecksum(SourceText.From(bytesNoBOM, bytesNoBOM.Length, encodingBOM, checksumAlgorithm), checksumNoBOM);
 
            // SourceText from bytes with BOM. Checksum should include BOM.
            VerifyChecksum(SourceText.From(bytesBOM, bytesBOM.Length, null, checksumAlgorithm), checksumBOM);
            VerifyChecksum(SourceText.From(bytesBOM, bytesBOM.Length, encodingNoBOM, checksumAlgorithm), checksumBOM);
            VerifyChecksum(SourceText.From(bytesBOM, bytesBOM.Length, encodingBOM, checksumAlgorithm), checksumBOM);
 
            // SourceText from stream no BOM. Checksum should ignore explicit encoding.
            VerifyChecksum(SourceText.From(streamNoBOM, null, checksumAlgorithm), checksumNoBOM);
            VerifyChecksum(SourceText.From(streamNoBOM, encodingNoBOM, checksumAlgorithm), checksumNoBOM);
            VerifyChecksum(SourceText.From(streamNoBOM, encodingBOM, checksumAlgorithm), checksumNoBOM);
 
            // SourceText from stream with BOM. Checksum should include BOM.
            VerifyChecksum(SourceText.From(streamBOM, null, checksumAlgorithm), checksumBOM);
            VerifyChecksum(SourceText.From(streamBOM, encodingNoBOM, checksumAlgorithm), checksumBOM);
            VerifyChecksum(SourceText.From(streamBOM, encodingBOM, checksumAlgorithm), checksumBOM);
 
            // LargeText from stream no BOM. Checksum should ignore explicit encoding.
            VerifyChecksum(LargeText.Decode(streamNoBOM, encodingNoBOM, checksumAlgorithm, throwIfBinaryDetected: false, canBeEmbedded: false), checksumNoBOM);
            VerifyChecksum(LargeText.Decode(streamNoBOM, encodingBOM, checksumAlgorithm, throwIfBinaryDetected: false, canBeEmbedded: false), checksumNoBOM);
 
            // LargeText from stream with BOM. Checksum should include BOM.
            VerifyChecksum(LargeText.Decode(streamBOM, encodingNoBOM, checksumAlgorithm, throwIfBinaryDetected: false, canBeEmbedded: false), checksumBOM);
            VerifyChecksum(LargeText.Decode(streamBOM, encodingBOM, checksumAlgorithm, throwIfBinaryDetected: false, canBeEmbedded: false), checksumBOM);
 
            // LargeText from writer no BOM. Checksum includes BOM
            // from explicit encoding. This is inconsistent with the
            // LargeText cases above but LargeTextWriter is only used
            // for unsaved edits where the checksum is ignored.
            VerifyChecksum(FromLargeTextWriter(source, encodingNoBOM, checksumAlgorithm), checksumNoBOM);
            VerifyChecksum(FromLargeTextWriter(source, encodingBOM, checksumAlgorithm), checksumBOM);
 
            // SourceText from string with changes. Checksum includes BOM from explicit encoding.
            VerifyChecksum(FromChanges(SourceText.From(source, encodingNoBOM, checksumAlgorithm)), checksumNoBOM);
            VerifyChecksum(FromChanges(SourceText.From(source, encodingBOM, checksumAlgorithm)), checksumBOM);
 
            // SourceText from stream with changes, no BOM. Checksum includes BOM
            // from explicit encoding. This is inconsistent with the SourceText cases but
            // "with changes" is only used for unsaved edits where the checksum is ignored.
            VerifyChecksum(FromChanges(SourceText.From(streamNoBOM, encodingNoBOM, checksumAlgorithm)), checksumNoBOM);
            VerifyChecksum(FromChanges(SourceText.From(streamNoBOM, encodingBOM, checksumAlgorithm)), checksumBOM);
 
            // SourceText from stream with changes, with BOM. Checksum includes BOM.
            VerifyChecksum(FromChanges(SourceText.From(streamBOM, encodingNoBOM, checksumAlgorithm)), checksumBOM);
            VerifyChecksum(FromChanges(SourceText.From(streamBOM, encodingBOM, checksumAlgorithm)), checksumBOM);
        }
 
        private static SourceText FromLargeTextWriter(string source, Encoding encoding, SourceHashAlgorithm checksumAlgorithm)
        {
            using (var writer = new LargeTextWriter(encoding, checksumAlgorithm, source.Length))
            {
                writer.Write(source);
                return writer.ToSourceText();
            }
        }
 
        private static SourceText FromChanges(SourceText text)
        {
            var span = new TextSpan(0, 1);
            var change = new TextChange(span, text.ToString(span));
            var changed = text.WithChanges(change);
            Assert.NotEqual(text, changed);
            return changed;
        }
 
        private static void VerifyChecksum(SourceText text, ImmutableArray<byte> expectedChecksum)
        {
            var actualChecksum = text.GetChecksum();
            Assert.Equal<byte>(expectedChecksum, actualChecksum);
        }
 
        [Fact]
        public void ContentEquals()
        {
            var f = SourceText.From(HelloWorld, s_utf8);
 
            Assert.True(f.ContentEquals(SourceText.From(HelloWorld, s_utf8)));
            Assert.False(f.ContentEquals(SourceText.From(HelloWorld + "o", s_utf8)));
            Assert.True(SourceText.From(HelloWorld, s_utf8).ContentEquals(SourceText.From(HelloWorld, s_utf8)));
 
            var e1 = EncodedStringText.Create(new MemoryStream(s_unicode.GetBytes(HelloWorld)), s_unicode);
            var e2 = EncodedStringText.Create(new MemoryStream(s_utf8.GetBytes(HelloWorld)), s_utf8);
 
            Assert.True(e1.ContentEquals(e1));
            Assert.True(f.ContentEquals(e1));
            Assert.True(e1.ContentEquals(f));
 
            Assert.True(e2.ContentEquals(e2));
            Assert.True(e1.ContentEquals(e2));
            Assert.True(e2.ContentEquals(e1));
        }
 
        [Fact]
        public void IsBinary()
        {
            Assert.False(SourceText.IsBinary(""));
 
            Assert.False(SourceText.IsBinary("\0abc"));
            Assert.False(SourceText.IsBinary("a\0bc"));
            Assert.False(SourceText.IsBinary("abc\0"));
            Assert.False(SourceText.IsBinary("a\0b\0c"));
 
            Assert.True(SourceText.IsBinary("\0\0abc"));
            Assert.True(SourceText.IsBinary("a\0\0bc"));
            Assert.True(SourceText.IsBinary("abc\0\0"));
 
            var encoding = Encoding.UTF8;
            Assert.False(SourceText.IsBinary(encoding.GetString(new byte[] { 0x81, 0x8D, 0x8F, 0x90, 0x9D })));
            // Unicode string: äëïöüû
            Assert.False(SourceText.IsBinary("abc def baz aeiouy \u00E4\u00EB\u00EF\u00F6\u00FC\u00FB"));
            Assert.True(SourceText.IsBinary(encoding.GetString(TestMetadata.ResourcesNet451.System)));
        }
 
        [Fact]
        public void FromThrowsIfBinary()
        {
            var bytes = TestMetadata.ResourcesNet451.System;
            Assert.Throws<InvalidDataException>(() => SourceText.From(bytes, bytes.Length, throwIfBinaryDetected: true));
 
            var stream = new MemoryStream(bytes);
            Assert.Throws<InvalidDataException>(() => SourceText.From(stream, throwIfBinaryDetected: true));
        }
 
        [Fact]
        public void FromTextReader()
        {
            var expected = "Text reader source text test";
            var expectedSourceText = SourceText.From(expected);
 
            var actual = new StringReader(expected);
            var actualSourceText = SourceText.From(actual, expected.Length);
 
            Assert.Equal<byte>(expectedSourceText.GetChecksum(), actualSourceText.GetChecksum());
 
            Assert.Same(s_utf8, SourceText.From(actual, expected.Length, s_utf8).Encoding);
            Assert.Same(s_unicode, SourceText.From(actual, expected.Length, s_unicode).Encoding);
            Assert.Null(SourceText.From(actual, expected.Length, null).Encoding);
        }
 
        [Fact]
        public void FromTextReader_Large()
        {
            var expected = new string('l', SourceText.LargeObjectHeapLimitInChars);
            var expectedSourceText = SourceText.From(expected);
 
            var actual = new StringReader(expected);
            var actualSourceText = SourceText.From(actual, expected.Length);
 
            Assert.IsType<LargeText>(actualSourceText);
            Assert.Equal<byte>(expectedSourceText.GetChecksum(), actualSourceText.GetChecksum());
 
            var utf8NoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
 
            Assert.Same(s_utf8, SourceText.From(actual, expected.Length, s_utf8).Encoding);
            Assert.Same(s_unicode, SourceText.From(actual, expected.Length, s_unicode).Encoding);
            Assert.Null(SourceText.From(actual, expected.Length, null).Encoding);
        }
 
        private static void TestTryReadByteOrderMark(Encoding expectedEncoding, int expectedPreambleLength, byte[] data)
        {
            TestTryReadByteOrderMark(expectedEncoding, expectedPreambleLength, data, data == null ? 0 : data.Length);
        }
 
        private static void TestTryReadByteOrderMark(Encoding expectedEncoding, int expectedPreambleLength, byte[] data, int validLength)
        {
            int actualPreambleLength;
            Encoding actualEncoding = SourceText.TryReadByteOrderMark(data, validLength, out actualPreambleLength);
            if (expectedEncoding == null)
            {
                Assert.Null(actualEncoding);
            }
            else
            {
                Assert.Equal(expectedEncoding, actualEncoding);
            }
 
            Assert.Equal(expectedPreambleLength, actualPreambleLength);
        }
 
        [Fact]
        public void TryReadByteOrderMark()
        {
            TestTryReadByteOrderMark(expectedEncoding: null, expectedPreambleLength: 0, data: new byte[0]);
            TestTryReadByteOrderMark(expectedEncoding: null, expectedPreambleLength: 0, data: new byte[] { 0xef });
            TestTryReadByteOrderMark(expectedEncoding: null, expectedPreambleLength: 0, data: new byte[] { 0xef, 0xbb });
            TestTryReadByteOrderMark(expectedEncoding: null, expectedPreambleLength: 0, data: new byte[] { 0xef, 0xBB, 0xBF }, validLength: 2);
            TestTryReadByteOrderMark(expectedEncoding: Encoding.UTF8, expectedPreambleLength: 3, data: new byte[] { 0xef, 0xBB, 0xBF });
 
            TestTryReadByteOrderMark(expectedEncoding: null, expectedPreambleLength: 0, data: new byte[] { 0xff });
            TestTryReadByteOrderMark(expectedEncoding: Encoding.Unicode, expectedPreambleLength: 2, data: new byte[] { 0xff, 0xfe });
 
            TestTryReadByteOrderMark(expectedEncoding: null, expectedPreambleLength: 0, data: new byte[] { 0xfe });
            TestTryReadByteOrderMark(expectedEncoding: Encoding.BigEndianUnicode, expectedPreambleLength: 2, data: new byte[] { 0xfe, 0xff });
        }
 
        [Fact]
        [WorkItem(41903, "https://github.com/dotnet/roslyn/issues/41903")]
        public void WriteWithRangeStartingLaterThanZero()
        {
            var sourceText = SourceText.From("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
 
            var writer = new StringWriter();
            sourceText.Write(writer, TextSpan.FromBounds(1, sourceText.Length));
 
            Assert.Equal("BCDEFGHIJKLMNOPQRSTUVWXYZ", writer.ToString());
        }
 
        public static IEnumerable<object[]> AllRanges(int totalLength) =>
            from start in Enumerable.Range(0, totalLength)
            from length in Enumerable.Range(0, totalLength - start)
            select new object[] { new TextSpan(start, length) };
 
        [Theory]
        [MemberData(nameof(AllRanges), 10)]
        [WorkItem(41903, "https://github.com/dotnet/roslyn/issues/41903")]
        public void WriteWithAllRanges(TextSpan span)
        {
            const string Text = "0123456789";
            var sourceText = SourceText.From(Text);
 
            var writer = new StringWriter();
            sourceText.Write(writer, span);
 
            Assert.Equal(Text.Substring(span.Start, span.Length), writer.ToString());
        }
 
        [Fact]
        public void WriteWithSpanStartingAfterEndThrowsOutOfRange()
        {
            var ex = Assert.ThrowsAny<ArgumentOutOfRangeException>(() =>
                SourceText.From("ABC").Write(TextWriter.Null, TextSpan.FromBounds(4, 4)));
 
            Assert.Equal("span", ex.ParamName);
        }
 
        [Fact]
        public void WriteWithSpanEndingAfterEndThrowsOutOfRange()
        {
            var ex = Assert.ThrowsAny<ArgumentOutOfRangeException>(() =>
                SourceText.From("ABC").Write(TextWriter.Null, TextSpan.FromBounds(2, 4)));
 
            Assert.Equal("span", ex.ParamName);
        }
    }
}