diff --git a/StreamRegex.Benchmarks/BufferSizeBenchmarks.cs b/StreamRegex.Benchmarks/BufferSizeBenchmarks.cs new file mode 100644 index 0000000..3913393 --- /dev/null +++ b/StreamRegex.Benchmarks/BufferSizeBenchmarks.cs @@ -0,0 +1,102 @@ +using System.Text.RegularExpressions; +using BenchmarkDotNet.Attributes; +using StreamRegex.Extensions; +using StreamRegex.Lib.DFA; +using StreamRegex.Lib.NFA; + +namespace StreamRegex.Benchmarks; +[MemoryDiagnoser] +public class BufferSizeBenchmarks +{ + private readonly Regex _compiled; + private const string Pattern = "racecar"; + private Stream _stream = new MemoryStream(); + public BufferSizeBenchmarks() + { + _compiled = new Regex(Pattern, RegexOptions.Compiled); + } + + [IterationSetup] + public void IterationSetup() + { + _stream = File.OpenRead(TestFileName); + } + + [IterationCleanup] + public void IterationCleanup() + { + _stream.Dispose(); + } + + //[Params("TargetStart.txt","TargetMiddle.txt","TargetEnd.txt")] + [Params("175MB.txt")] + public string TestFileName { get; set; } + + [BenchmarkCategory("Regex")] + [Benchmark] + public void CompiledRegex() + { + var content = new StreamReader(_stream).ReadToEnd(); + if (!_compiled.IsMatch(content)) + { + throw new Exception($"The regex didn't match."); + } + } + + [BenchmarkCategory("Regex")] + [Benchmark] + public void RegexExtension() + { + var content = new StreamReader(_stream); + if (!_compiled.IsMatch(content)) + { + throw new Exception($"The regex didn't match."); + } + } + + [BenchmarkCategory("Contains")] + [Benchmark(Baseline = true)] + + public void SimpleString() + { + var match = new StreamReader(_stream).ReadToEnd().IndexOf("racecar"); + if (match == -1) + { + throw new Exception($"The regex didn't match."); + } + } + + [BenchmarkCategory("Contains")] + [Benchmark] + public void StringExtension() + { + var content = new StreamReader(_stream); + var match = content.IndexOf("racecar"); + if (match == -1) + { + throw new Exception($"The regex didn't match."); + } + } + + + // [Benchmark] + public void StateMachine() + { + var stateMachine = StateMachineFactory.CreateStateMachine(Pattern); + if (stateMachine.GetFirstMatchPosition(_stream) == -1) + { + throw new Exception("The regex didn't match"); + } + } + + // [Benchmark] + public void NFAStateMachine() + { + var stateMachine = NfaStateMachineFactory.CreateStateMachine(Pattern); + var match = stateMachine.Match(_stream); + if (match is null) + { + throw new Exception("The regex didn't match"); + } + } +} \ No newline at end of file diff --git a/StreamRegex.Benchmarks/PerformanceVsStandard.cs b/StreamRegex.Benchmarks/LargeFileBenchmarks.cs similarity index 74% rename from StreamRegex.Benchmarks/PerformanceVsStandard.cs rename to StreamRegex.Benchmarks/LargeFileBenchmarks.cs index 23a473f..c4f4b22 100644 --- a/StreamRegex.Benchmarks/PerformanceVsStandard.cs +++ b/StreamRegex.Benchmarks/LargeFileBenchmarks.cs @@ -32,7 +32,8 @@ public void IterationCleanup() [Params("175MB.txt")] public string TestFileName { get; set; } - [Benchmark(Baseline = true)] + [BenchmarkCategory("Regex")] + [Benchmark] public void CompiledRegex() { var content = new StreamReader(_stream).ReadToEnd(); @@ -41,7 +42,43 @@ public void CompiledRegex() throw new Exception($"The regex didn't match."); } } + + [BenchmarkCategory("Regex")] + [Benchmark] + public void RegexExtension() + { + var content = new StreamReader(_stream); + if (!_compiled.IsMatch(content)) + { + throw new Exception($"The regex didn't match."); + } + } + + [BenchmarkCategory("Contains")] + [Benchmark(Baseline = true)] + public void SimpleString() + { + var match = new StreamReader(_stream).ReadToEnd().IndexOf("racecar"); + if (match == -1) + { + throw new Exception($"The regex didn't match."); + } + } + + [BenchmarkCategory("Contains")] + [Benchmark] + public void StringExtension() + { + var content = new StreamReader(_stream); + var match = content.IndexOf("racecar"); + if (match == -1) + { + throw new Exception($"The regex didn't match."); + } + } + + // [Benchmark] public void StateMachine() { @@ -62,14 +99,4 @@ public void NFAStateMachine() throw new Exception("The regex didn't match"); } } - - [Benchmark] - public void RegexExtension() - { - var content = new StreamReader(_stream); - if (!_compiled.IsMatch(content)) - { - throw new Exception($"The regex didn't match."); - } - } } \ No newline at end of file diff --git a/StreamRegex.Benchmarks/Program.cs b/StreamRegex.Benchmarks/Program.cs index 4d112cf..a810727 100644 --- a/StreamRegex.Benchmarks/Program.cs +++ b/StreamRegex.Benchmarks/Program.cs @@ -4,7 +4,7 @@ using StreamRegex.Lib.DFA; using StreamRegex.Lib.NFA; -// var summary = BenchmarkRunner.Run(); +var summary = BenchmarkRunner.Run(); // NFATest(); void ExtensionsTest() { diff --git a/StreamRegex.Benchmarks/VariousPositions.cs b/StreamRegex.Benchmarks/VariousPositions.cs new file mode 100644 index 0000000..e89f5ca --- /dev/null +++ b/StreamRegex.Benchmarks/VariousPositions.cs @@ -0,0 +1,111 @@ +using System.Text; +using System.Text.RegularExpressions; +using BenchmarkDotNet.Attributes; +using StreamRegex.Extensions; +using StreamRegex.Lib.DFA; +using StreamRegex.Lib.NFA; + +namespace StreamRegex.Benchmarks; +[MemoryDiagnoser] + +// Tests checking for the string "racecar" that only occurs at the end of a very large file. +public class LargeFileBenchmarks +{ + private readonly Regex _compiled; + private const string Pattern = "racecar"; + private Stream _stream = new MemoryStream(); + private const int _paddingLength = 1024 * 1024 * 100; // 100 MB + private StringBuilder _testData = new StringBuilder(); + public LargeFileBenchmarks() + { + while (_testData.Length < _paddingLength) + { + _testData.Append(Enumerable.Repeat("a", 1024)); + } + + _testData.Append(Pattern); + _compiled = new Regex(Pattern, RegexOptions.Compiled); + } + + [IterationSetup] + public void IterationSetup() + { + _stream = new MemoryStream(Encoding.UTF8.GetBytes(_testData.ToString())); + } + + [IterationCleanup] + public void IterationCleanup() + { + _stream.Dispose(); + } + + [BenchmarkCategory("Regex")] + [Benchmark] + public void CompiledRegex() + { + var content = new StreamReader(_stream).ReadToEnd(); + var match = _compiled.Match(content); + if (!match.Success || match.Index != _paddingLength) + { + throw new Exception($"The regex didn't match {match.Index}."); + } + } + + [BenchmarkCategory("Regex")] + [Benchmark] + public void RegexExtension() + { + var content = new StreamReader(_stream); + var match = _compiled.GetFirstMatch(content); + if (!match.Success || match.Index != _paddingLength) + { + throw new Exception($"The regex didn't match {match.Index}."); + } + } + + [BenchmarkCategory("Contains")] + [Benchmark(Baseline = true)] + + public void SimpleString() + { + var match = new StreamReader(_stream).ReadToEnd().IndexOf("racecar"); + if (match != _paddingLength) + { + throw new Exception($"The regex didn't match {match}."); + } + } + + [BenchmarkCategory("Contains")] + [Benchmark] + public void StringExtension() + { + var content = new StreamReader(_stream); + var match = content.IndexOf("racecar"); + if (match != _paddingLength) + { + throw new Exception($"The regex didn't match {match}."); + } + } + + + // [Benchmark] + public void StateMachine() + { + var stateMachine = StateMachineFactory.CreateStateMachine(Pattern); + if (stateMachine.GetFirstMatchPosition(_stream) == -1) + { + throw new Exception("The regex didn't match"); + } + } + + // [Benchmark] + public void NFAStateMachine() + { + var stateMachine = NfaStateMachineFactory.CreateStateMachine(Pattern); + var match = stateMachine.Match(_stream); + if (match is null) + { + throw new Exception("The regex didn't match"); + } + } +} \ No newline at end of file diff --git a/StreamRegex.Extensions/SlidingBufferExtensions.cs b/StreamRegex.Extensions/SlidingBufferExtensions.cs index 2dcea23..6509ca3 100644 --- a/StreamRegex.Extensions/SlidingBufferExtensions.cs +++ b/StreamRegex.Extensions/SlidingBufferExtensions.cs @@ -10,9 +10,9 @@ public static class SlidingBufferExtensions /// /// /// - public static bool Contains(this Stream toMatch, string target, StringComparison comparison, SlidingBufferOptions? options = null) + public static bool Contains(this Stream toMatch, string target, StringComparison? comparison = null, SlidingBufferOptions? options = null) { - return new StreamReader(toMatch).IsMatch(contentChunk => contentChunk.Contains(target, comparison), options); + return new StreamReader(toMatch).Contains(target, comparison, options); } /// @@ -23,9 +23,11 @@ public static bool Contains(this Stream toMatch, string target, StringComparison /// /// /// - public static bool Contains(this StreamReader toMatch, string target, StringComparison comparison, SlidingBufferOptions? options = null) + public static bool Contains(this StreamReader toMatch, string target, StringComparison? comparison = null, SlidingBufferOptions? options = null) { - return toMatch.IsMatch(contentChunk => contentChunk.Contains(target, comparison), options); + return comparison is { } notNullComparison ? + toMatch.IsMatch(contentChunk => contentChunk.Contains(target, notNullComparison), options) : + toMatch.IsMatch(contentChunk => contentChunk.Contains(target), options); } /// diff --git a/StreamRegex.Extensions/SlidingBufferMatchCollection.cs b/StreamRegex.Extensions/SlidingBufferMatchCollection.cs index 44b74bc..6b90b08 100644 --- a/StreamRegex.Extensions/SlidingBufferMatchCollection.cs +++ b/StreamRegex.Extensions/SlidingBufferMatchCollection.cs @@ -5,7 +5,7 @@ namespace StreamRegex.Extensions; /// -/// A collection holding unique . +/// A collection holding unique for a single resource. The matches are Records which are deduplicated automatically. /// public class SlidingBufferMatchCollection : IEnumerable where T : SlidingBufferMatch { @@ -46,15 +46,15 @@ public void AddMatches(IEnumerable matchCollection) } /// - /// Update the index position of the matches in this collection by a specific offset and return the collection. + /// Update the index position of the matches in this collection by a specific offset and return the modified collection. Does not make a copy. /// /// The offset to apply /// This collection with the matches modified public SlidingBufferMatchCollection WithOffset(long offset) { - foreach (var SlidingBufferMatch in _collection) + foreach (var slidingBufferMatch in _collection) { - SlidingBufferMatch.Index += offset; + slidingBufferMatch.Index += offset; } return this;