Why is HashSet<string>(StringComparer.InvariantCultureIgnoreCase)
(Perf_HashSet_CaseInsensitive
) perf'ing (performing) so bad? The Perf_HashSet
workaround is perf'ing 20x comparatively.
// perf Contains(): 1M iterations, 25 size (unsuccessful lookup)
Test Duration (ms)
Perf_HashSet 43 <-
Perf_Dictionary 49
Perf_HybridDictionary 63
Perf_ListDictionary 223
Perf_List 225
Perf_HashSet_CaseInsensitive 903 <-
code:
// <TargetFramework>net5.0</TargetFramework>
[TestFixture]
public class ContainsPerfTests
{
private const int iterations = 1_000_000;
private const int size = 25;
[Test]
[Explicit]
public void Perf_List()
{
var list = new List<string>();
for (int i = 0; i < size; i )
{
list.Add(Guid.NewGuid().ToString().ToLowerInvariant());
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i )
{
var contains = list.Contains(x.ToLowerInvariant());
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_HashSet()
{
var hashSet = new HashSet<string>();
for (int i = 0; i < size; i )
{
hashSet.Add(Guid.NewGuid().ToString().ToLowerInvariant());
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i )
{
var contains = hashSet.Contains(x.ToLowerInvariant());
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_HashSet_CaseInsensitive()
{
var hashSetCaseInsensitive = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
for (int i = 0; i < size; i )
{
hashSetCaseInsensitive.Add(Guid.NewGuid().ToString().ToLowerInvariant());
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i )
{
var contains = hashSetCaseInsensitive.Contains(x);
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_Dictionary()
{
var dictionary = new Dictionary<string, bool>();
for (int i = 0; i < size; i )
{
dictionary.Add(Guid.NewGuid().ToString().ToLowerInvariant(), false);
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i )
{
var contains = dictionary.ContainsKey(x.ToLowerInvariant());
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_HybridDictionary()
{
var hybridDictionary = new HybridDictionary(caseInsensitive: true);
for (int i = 0; i < size; i )
{
hybridDictionary.Add(Guid.NewGuid().ToString().ToLowerInvariant(), null);
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i )
{
var contains = hybridDictionary.Contains(x);
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_ListDictionary()
{
var listDictionary = new ListDictionary();
for (int i = 0; i < size; i )
{
listDictionary.Add(Guid.NewGuid().ToString().ToLowerInvariant(), null);
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i )
{
var contains = listDictionary.Contains(x.ToLowerInvariant());
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
}
CodePudding user response:
The main reason for this big difference in performance is because you are using a culture-sensitive comparison for the case-insensitive hash set, whereas the case-sensitive hash set uses an ordinal one.
Without passing the hash set any equality comparers, the hash set uses the default equality comparer of that type, which calls the Equals
method. Notice that String.Equals
performs an ordinal (case-sensitive and culture-insensitive) comparison.
Whereas StringComparer.InvariantCultureIgnoreCase
:
performs a case-insensitive string comparison using the word comparison rules of the invariant culture.
It is important to note that "invariant culture" is not the same as "culture-insensitive", or "ordinal":
Console.WriteLine(StringComparer.InvariantCultureIgnoreCase.Equals( "ss", "ß")); // true Console.WriteLine(StringComparer.OrdinalIgnoreCase.Equals( "ss", "ß")); // false
The invariant culture still has collation rules, and looking up and applying those rules takes time.
Changing InvariantCultureIgnoreCase
to OrdinalIgnoreCase
should make the execution time almost the same. That doesn't mean there aren't other problems with your benchmark, as the comments have pointed out.