Skip to content

Commit

Permalink
Fix edge-case of ASCII set after Notonelazy that doesn't overlap with…
Browse files Browse the repository at this point in the history
… target
  • Loading branch information
MihaZupan committed Dec 2, 2022
1 parent 866b27c commit 48179aa
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,6 @@ private static string EmitIndexOfAnyValuesOrLiteral(ReadOnlySpan<char> chars, Di
private static string EmitIndexOfAnyValues(char[] asciiChars, Dictionary<string, string[]> requiredHelpers)
{
Debug.Assert(RegexCharClass.IsAscii(asciiChars));
Debug.Assert(asciiChars.AsSpan().SequenceEqual(asciiChars.OrderBy(c => c).ToArray()));

// The set of ASCII characters can be represented as a 128-bit bitmap. Use the 16-byte hex string as the key.
byte[] bitmap = new byte[16];
Expand All @@ -404,6 +403,8 @@ private static string EmitIndexOfAnyValues(char[] asciiChars, Dictionary<string,

if (!requiredHelpers.ContainsKey(helperName))
{
Array.Sort(asciiChars);

requiredHelpers.Add(helperName, new string[]
{
$"internal static readonly IndexOfAnyValues<char> {fieldName} = IndexOfAnyValues.Create({Literal(new string(asciiChars))});",
Expand Down Expand Up @@ -3168,8 +3169,8 @@ literal.SetChars is not null ||
}
else if (literal.AsciiChars is not null) // set of only ASCII characters
{
overlap = literal.AsciiChars.Contains(node.Ch);
char[] asciiChars = literal.AsciiChars;
overlap = asciiChars.Contains(node.Ch);
if (!overlap)
{
Debug.Assert(node.Ch < 128);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3399,7 +3399,7 @@ node.Kind is RegexNodeKind.Notonelazy &&
!literal.Negated && // not negated; can't search for both the node.Ch and a negated subsequent char with an IndexOf* method
(literal.String is not null ||
literal.SetChars is not null ||
literal.AsciiChars is not null ||
(literal.AsciiChars is not null && node.Ch < 128) || // for ASCII sets, only allow when the target can be efficiently included in the set
literal.Range.LowInclusive == literal.Range.HighInclusive ||
(literal.Range.LowInclusive <= node.Ch && node.Ch <= literal.Range.HighInclusive))) // for ranges, only allow when the range overlaps with the target, since there's no accelerated way to search for the union
{
Expand Down Expand Up @@ -3474,8 +3474,15 @@ literal.AsciiChars is not null ||
}
else if (literal.AsciiChars is not null) // set of only ASCII characters
{
overlap = literal.AsciiChars.AsSpan().Contains(node.Ch);
LoadIndexOfAnyValues(literal.AsciiChars);
char[] asciiChars = literal.AsciiChars;
overlap = asciiChars.AsSpan().Contains(node.Ch);
if (!overlap)
{
Debug.Assert(node.Ch < 128);
Array.Resize(ref asciiChars, asciiChars.Length + 1);
asciiChars[asciiChars.Length - 1] = node.Ch;
}
LoadIndexOfAnyValues(asciiChars);
Call(s_spanIndexOfAnyIndexOfAnyValues);
}
else if (literal.Range.LowInclusive == literal.Range.HighInclusive) // single char from a RegexNode.One
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,9 @@ public static IEnumerable<object[]> Match_MemberData()
yield return (@"a[^c]*?[bcdef]", "xyza12345e6789", lineOption, 0, 14, true, "a12345e");
yield return (@"a[^b]*?[bcdef]", "xyza12345f6789", lineOption, 0, 14, true, "a12345f");
yield return (@"a[^c]*?[bcdef]", "xyza12345g6789", lineOption, 0, 14, false, "");

yield return ("a[^b]*?[cdefgz]", "xyza123bc4", lineOption, 0, 10, false, "");
yield return ("a[^b]*?[bdefgz]", "xyza123bc4", lineOption, 0, 10, true, "a123b");
}

// Nested loops
Expand Down

0 comments on commit 48179aa

Please sign in to comment.