Skip to content

Commit

Permalink
Use ranges for short sets where applicable, avoid negated IndexOfAnyV…
Browse files Browse the repository at this point in the history
…alues primary sets
  • Loading branch information
MihaZupan authored and Miha Zupan committed Dec 7, 2022
1 parent bc8a994 commit aa01724
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 140 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -887,19 +887,21 @@ void EmitFixedSet_LeftToRight()
};

string indexOf =
primarySet.Chars is not null ? primarySet.Chars!.Length switch
primarySet.Chars is not null ? (primarySet.Negated, primarySet.Chars.Length) switch
{
1 => $"{span}.IndexOf({Literal(primarySet.Chars[0])})",
2 => $"{span}.IndexOfAny({Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])})",
3 => $"{span}.IndexOfAny({Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])}, {Literal(primarySet.Chars[2])})",
_ => $"{span}.IndexOfAny({EmitIndexOfAnyValuesOrLiteral(primarySet.Chars, requiredHelpers)})",
(false, 1) => $"{span}.IndexOf({Literal(primarySet.Chars[0])})",
(false, 2) => $"{span}.IndexOfAny({Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])})",
(false, 3) => $"{span}.IndexOfAny({Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])}, {Literal(primarySet.Chars[2])})",
(false, _) => $"{span}.IndexOfAny({EmitIndexOfAnyValuesOrLiteral(primarySet.Chars, requiredHelpers)})",
(true, 1) => $"{span}.IndexOfAnyExcept({Literal(primarySet.Chars[0])})",
_ => throw new InvalidOperationException("Expected that negated sets will have at most 1 value in Chars."),
} :
primarySet.AsciiSet is not null ? primarySet.AsciiSet.Value.Negated switch
primarySet.AsciiSet is not null ? primarySet.Negated switch
{
false => $"{span}.IndexOfAny({EmitIndexOfAnyValues(primarySet.AsciiSet.Value.Chars, requiredHelpers)})",
true => $"{span}.IndexOfAnyExcept({EmitIndexOfAnyValues(primarySet.AsciiSet.Value.Chars, requiredHelpers)})",
false => $"{span}.IndexOfAny({EmitIndexOfAnyValues(primarySet.AsciiSet, requiredHelpers)})",
_ => throw new InvalidOperationException("Expected AsciiSets not to be negated."),
} :
(primarySet.Range.Value.LowInclusive == primarySet.Range.Value.HighInclusive, primarySet.Range.Value.Negated) switch
(primarySet.Range.Value.LowInclusive == primarySet.Range.Value.HighInclusive, primarySet.Negated) switch
{
(false, false) => $"{span}.IndexOfAnyInRange({Literal(primarySet.Range.Value.LowInclusive)}, {Literal(primarySet.Range.Value.HighInclusive)})",
(true, false) => $"{span}.IndexOf({Literal(primarySet.Range.Value.LowInclusive)})",
Expand Down Expand Up @@ -4440,8 +4442,22 @@ private static bool TryEmitIndexOf(
bool negated = RegexCharClass.IsNegated(node.Str) ^ negate;

Span<char> setChars = stackalloc char[5]; // current max that's vectorized
int setCharsCount;
if ((setCharsCount = RegexCharClass.GetSetChars(node.Str, setChars)) > 0)
int setCharsCount = RegexCharClass.GetSetChars(node.Str, setChars);

// Prefer IndexOfAnyInRange over IndexOfAny for sets of 2-5 values that fit in a single range
if (setCharsCount != 1 && RegexCharClass.TryGetSingleRange(node.Str, out char lowInclusive, out char highInclusive))
{
string indexOfAnyInRangeName = !negated ?
"IndexOfAnyInRange" :
"IndexOfAnyExceptInRange";

indexOfExpr = $"{last}{indexOfAnyInRangeName}({Literal(lowInclusive)}, {Literal(highInclusive)})";

literalLength = 1;
return true;
}

if (setCharsCount > 0)
{
(string indexOfName, string indexOfAnyName) = !negated ?
("IndexOf", "IndexOfAny") :
Expand All @@ -4460,18 +4476,6 @@ private static bool TryEmitIndexOf(
return true;
}

if (RegexCharClass.TryGetSingleRange(node.Str, out char lowInclusive, out char highInclusive))
{
string indexOfAnyInRangeName = !negated ?
"IndexOfAnyInRange" :
"IndexOfAnyExceptInRange";

indexOfExpr = $"{last}{indexOfAnyInRangeName}({Literal(lowInclusive)}, {Literal(highInclusive)})";

literalLength = 1;
return true;
}

if (RegexCharClass.TryGetAsciiSetChars(node.Str, out char[]? asciiChars))
{
string indexOfAnyName = !negated ?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -888,12 +888,13 @@ void EmitFixedSet_LeftToRight()

if (primarySet.Chars is not null)
{
Debug.Assert(!primarySet.Negated || primarySet.Chars.Length == 1);
switch (primarySet.Chars.Length)
{
case 1:
// tmp = ...IndexOf(setChars[0]);
// tmp = ...IndexOf{AnyExcept}(setChars[0]);
Ldc(primarySet.Chars[0]);
Call(s_spanIndexOfChar);
Call(primarySet.Negated ? s_spanIndexOfAnyExceptChar : s_spanIndexOfChar);
break;

case 2:
Expand All @@ -920,23 +921,24 @@ void EmitFixedSet_LeftToRight()
}
else if (primarySet.AsciiSet is not null)
{
LoadIndexOfAnyValues(primarySet.AsciiSet.Value.Chars);
Call(primarySet.AsciiSet.Value.Negated ? s_spanIndexOfAnyExceptIndexOfAnyValues : s_spanIndexOfAnyIndexOfAnyValues);
Debug.Assert(!primarySet.Negated);
LoadIndexOfAnyValues(primarySet.AsciiSet);
Call(s_spanIndexOfAnyIndexOfAnyValues);
}
else
{
if (primarySet.Range!.Value.LowInclusive == primarySet.Range.Value.HighInclusive)
{
// tmp = ...IndexOf{AnyExcept}(low);
Ldc(primarySet.Range.Value.LowInclusive);
Call(primarySet.Range.Value.Negated ? s_spanIndexOfAnyExceptChar : s_spanIndexOfChar);
Call(primarySet.Negated ? s_spanIndexOfAnyExceptChar : s_spanIndexOfChar);
}
else
{
// tmp = ...IndexOfAny{Except}InRange(low, high);
Ldc(primarySet.Range.Value.LowInclusive);
Ldc(primarySet.Range.Value.HighInclusive);
Call(primarySet.Range.Value.Negated ? s_spanIndexOfAnyExceptInRange : s_spanIndexOfAnyInRange);
Call(primarySet.Negated ? s_spanIndexOfAnyExceptInRange : s_spanIndexOfAnyInRange);
}
}

Expand Down Expand Up @@ -1060,7 +1062,7 @@ void EmitFixedSet_RightToLeft()
RegexFindOptimizations.FixedDistanceSet set = _regexTree.FindOptimizations.FixedDistanceSets![0];
Debug.Assert(set.Distance == 0);

if (set.Chars is { Length: 1 })
if (set.Chars is { Length: 1 } && !set.Negated)
{
// pos = inputSpan.Slice(0, pos).LastIndexOf(set.Chars[0]);
Ldloca(inputSpan);
Expand Down Expand Up @@ -4999,10 +5001,40 @@ void EmitIndexOf(RegexNode node, bool useLast, bool negate)
{
bool negated = RegexCharClass.IsNegated(node.Str) ^ negate;

// IndexOfAny{Except}(ch1, ...)
Span<char> setChars = stackalloc char[5]; // current max that's vectorized
int setCharsCount;
if ((setCharsCount = RegexCharClass.GetSetChars(node.Str, setChars)) > 0)
int setCharsCount = RegexCharClass.GetSetChars(node.Str, setChars);

// IndexOfAny{Except}InRange
// Prefer IndexOfAnyInRange over IndexOfAny for sets of 2-5 values that fit in a single range
if (setCharsCount != 1 && RegexCharClass.TryGetSingleRange(node.Str, out char lowInclusive, out char highInclusive))
{
if (lowInclusive == highInclusive)
{
Ldc(lowInclusive);
Call((useLast, negated) switch
{
(false, false) => s_spanIndexOfChar,
(false, true) => s_spanIndexOfAnyExceptChar,
(true, false) => s_spanLastIndexOfChar,
(true, true) => s_spanLastIndexOfAnyExceptChar,
});
return;
}

Ldc(lowInclusive);
Ldc(highInclusive);
Call((useLast, negated) switch
{
(false, false) => s_spanIndexOfAnyInRange,
(false, true) => s_spanIndexOfAnyExceptInRange,
(true, false) => s_spanLastIndexOfAnyInRange,
(true, true) => s_spanLastIndexOfAnyExceptInRange,
});
return;
}

// IndexOfAny{Except}(ch1, ...)
if (setCharsCount > 0)
{
setChars = setChars.Slice(0, setCharsCount);
switch (setChars.Length)
Expand Down Expand Up @@ -5057,34 +5089,6 @@ void EmitIndexOf(RegexNode node, bool useLast, bool negate)
}
}

// IndexOfAny{Except}InRange
if (RegexCharClass.TryGetSingleRange(node.Str, out char lowInclusive, out char highInclusive))
{
if (lowInclusive == highInclusive)
{
Ldc(lowInclusive);
Call((useLast, negated) switch
{
(false, false) => s_spanIndexOfChar,
(false, true) => s_spanIndexOfAnyExceptChar,
(true, false) => s_spanLastIndexOfChar,
(true, true) => s_spanLastIndexOfAnyExceptChar,
});
return;
}

Ldc(lowInclusive);
Ldc(highInclusive);
Call((useLast, negated) switch
{
(false, false) => s_spanIndexOfAnyInRange,
(false, true) => s_spanIndexOfAnyExceptInRange,
(true, false) => s_spanLastIndexOfAnyInRange,
(true, true) => s_spanLastIndexOfAnyExceptInRange,
});
return;
}

// IndexOfAny{Except}(IndexOfAnyValues<char>)
if (RegexCharClass.TryGetAsciiSetChars(node.Str, out char[]? asciiChars))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
// the set's characteristics.
if (!compiled &&
fixedDistanceSets.Count == 1 &&
fixedDistanceSets[0].Chars is { Length: 1 })
fixedDistanceSets[0].Chars is { Length: 1 } &&
!fixedDistanceSets[0].Negated)
{
FixedDistanceLiteral = (fixedDistanceSets[0].Chars![0], null, fixedDistanceSets[0].Distance);
FindMode = FindNextStartingPositionMode.FixedDistanceChar_LeftToRight;
Expand Down Expand Up @@ -241,14 +242,16 @@ public FixedDistanceSet(char[]? chars, string set, int distance)

/// <summary>The character class description.</summary>
public string Set;
/// <summary>Whether the <see cref="Set"/> is negated.</summary>
public bool Negated;
/// <summary>Small list of all of the characters that make up the set, if known; otherwise, null.</summary>
public char[]? Chars;
/// <summary>The distance of the set from the beginning of the match.</summary>
public int Distance;
/// <summary>As an alternative to <see cref="Chars"/>, a description of the single range the set represents, if it does.</summary>
public (char LowInclusive, char HighInclusive, bool Negated)? Range;
public (char LowInclusive, char HighInclusive)? Range;
/// <summary>As an alternative to <see cref="Chars"/>, a description of the set of ASCII characters it represents, if it does.</summary>
public (char[] Chars, bool Negated)? AsciiSet;
public char[]? AsciiSet;
}

/// <summary>When in literal after set loop node, gets the literal to search for and the RegexNode representing the leading loop.</summary>
Expand All @@ -273,7 +276,7 @@ private static (string String, int Distance)? FindFixedDistanceString(List<Fixed
for (int i = 0; i < fixedDistanceSets.Count + 1; i++)
{
char[]? chars = i < fixedDistanceSets.Count ? fixedDistanceSets[i].Chars : null;
bool invalidChars = chars is not { Length: 1 };
bool invalidChars = chars is not { Length: 1 } || fixedDistanceSets[i].Negated;

// If the current set ends a sequence (or we've walked off the end), see whether
// what we've gathered constitues a valid string, and if it's better than the
Expand Down Expand Up @@ -554,7 +557,7 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,
string set = primarySet.Set;

ReadOnlySpan<char> span = textSpan.Slice(pos);
if (chars is not null)
if (chars is not null && !primarySet.Negated)
{
int i = span.IndexOfAny(chars);
if (i >= 0)
Expand Down Expand Up @@ -621,7 +624,7 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,

int endMinusRequiredLength = textSpan.Length - Math.Max(1, MinRequiredLength);

if (primarySet.Chars is not null)
if (primarySet.Chars is not null && !primarySet.Negated)
{
for (int inputPosition = pos; inputPosition <= endMinusRequiredLength; inputPosition++)
{
Expand Down
Loading

0 comments on commit aa01724

Please sign in to comment.