File: Utilities\BloomFilterTests.cs
Web Access
Project: ..\..\..\src\EditorFeatures\Test\Microsoft.CodeAnalysis.EditorFeatures.UnitTests.csproj (Microsoft.CodeAnalysis.EditorFeatures.UnitTests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
#nullable disable
 
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Microsoft.CodeAnalysis.Shared.Utilities;
using Roslyn.Utilities;
using Xunit;
 
namespace Microsoft.CodeAnalysis.Editor.UnitTests.Utilities
{
    public class BloomFilterTests
    {
        private static IEnumerable<string> GenerateStrings(int count)
        {
            for (var i = 1; i <= count; i++)
            {
                yield return GenerateString(i);
            }
        }
 
        private static string GenerateString(int value)
        {
            const string Alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
            var builder = new StringBuilder();
 
            while (value > 0)
            {
                var v = value % Alphabet.Length;
                var c = Alphabet[v];
                builder.Append(c);
                value /= Alphabet.Length;
            }
 
            return builder.ToString();
        }
 
        private static void Test(bool isCaseSensitive)
        {
            var comparer = isCaseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase;
            var strings = GenerateStrings(2000).Skip(500).Take(1000).ToSet(comparer);
            var testStrings = GenerateStrings(100000);
 
            for (var d = 0.1; d >= 0.0001; d /= 10)
            {
                var filter = new BloomFilter(strings.Count, d, isCaseSensitive);
                filter.AddRange(strings);
 
                var correctCount = 0.0;
                var incorrectCount = 0.0;
                foreach (var test in testStrings)
                {
                    var actualContains = strings.Contains(test);
                    var filterContains = filter.ProbablyContains(test);
 
                    if (!filterContains)
                    {
                        // if the filter says no, then it can't be in the real set.
                        Assert.False(actualContains);
                    }
 
                    if (actualContains == filterContains)
                    {
                        correctCount++;
                    }
                    else
                    {
                        incorrectCount++;
                    }
                }
 
                var falsePositivePercentage = incorrectCount / (correctCount + incorrectCount);
                Assert.True(falsePositivePercentage < (d * 1.5), string.Format("falsePositivePercentage={0}, d={1}", falsePositivePercentage, d));
            }
        }
 
        [Fact]
        public void Test1()
            => Test(isCaseSensitive: true);
 
        [Fact]
        public void TestInsensitive()
            => Test(isCaseSensitive: false);
 
        [Fact]
        public void TestEmpty()
        {
            for (var d = 0.1; d >= 0.0001; d /= 10)
            {
                var filter = new BloomFilter(0, d, isCaseSensitive: true);
                Assert.False(filter.ProbablyContains(string.Empty));
                Assert.False(filter.ProbablyContains("a"));
                Assert.False(filter.ProbablyContains("b"));
                Assert.False(filter.ProbablyContains("c"));
 
                var testStrings = GenerateStrings(100000);
                foreach (var test in testStrings)
                {
                    Assert.False(filter.ProbablyContains(test));
                }
            }
        }
 
        [Fact]
        public void TestSerialization()
        {
            var stream = new MemoryStream();
            var bloomFilter = new BloomFilter(0.001, false, new[] { "Hello, World" });
 
            using (var writer = new ObjectWriter(stream, leaveOpen: true))
            {
                bloomFilter.WriteTo(writer);
            }
 
            stream.Position = 0;
 
            using var reader = ObjectReader.TryGetReader(stream);
            var rehydratedFilter = BloomFilter.ReadFrom(reader);
            Assert.True(bloomFilter.IsEquivalent(rehydratedFilter));
        }
 
        [Fact]
        public void TestSerialization2()
        {
            var stream = new MemoryStream();
            var bloomFilter = new BloomFilter(0.001, new[] { "Hello, World" }, new long[] { long.MaxValue, -1, 0, 1, long.MinValue });
 
            using (var writer = new ObjectWriter(stream, leaveOpen: true))
            {
                bloomFilter.WriteTo(writer);
            }
 
            stream.Position = 0;
 
            using var reader = ObjectReader.TryGetReader(stream);
            var rehydratedFilter = BloomFilter.ReadFrom(reader);
            Assert.True(bloomFilter.IsEquivalent(rehydratedFilter));
        }
 
        [Fact]
        public void TestInt64()
        {
            var longs = CreateLongs(GenerateStrings(2000).Skip(500).Take(1000).Select(s => s.GetHashCode()).ToList());
            var testLongs = CreateLongs(GenerateStrings(100000).Select(s => s.GetHashCode()).ToList());
 
            for (var d = 0.1; d >= 0.0001; d /= 10)
            {
                var filter = new BloomFilter(d, new string[] { }, longs);
 
                var correctCount = 0.0;
                var incorrectCount = 0.0;
                foreach (var test in testLongs)
                {
                    var actualContains = longs.Contains(test);
                    var filterContains = filter.ProbablyContains(test);
 
                    if (!filterContains)
                    {
                        // if the filter says no, then it can't be in the real set.
                        Assert.False(actualContains);
                    }
 
                    if (actualContains == filterContains)
                    {
                        correctCount++;
                    }
                    else
                    {
                        incorrectCount++;
                    }
                }
 
                var falsePositivePercentage = incorrectCount / (correctCount + incorrectCount);
                Assert.True(falsePositivePercentage < (d * 1.5), string.Format("falsePositivePercentage={0}, d={1}", falsePositivePercentage, d));
            }
        }
 
        private static HashSet<long> CreateLongs(List<int> ints)
        {
            var result = new HashSet<long>();
 
            for (var i = 0; i < ints.Count; i += 2)
            {
                var long1 = ((long)ints[i]) << 32;
                var long2 = (long)ints[i + 1];
 
                result.Add(long1 | long2);
            }
 
            return result;
        }
    }
}