-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFnMatch.cpp
149 lines (133 loc) · 4.33 KB
/
FnMatch.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#include "FnMatch.h"
#include <assert.h>
#if 1
// Implementation with optimized speed (6.5x times faster in my dataset by than original naive implementation)
__declspec(noinline) // noinline is added to help CPU profiling in release version
bool CFnMatch::Match(const std::string_view text, const std::string_view pattern)
{
const char* pText = text.data();
const char* const pTextEnd = pText + text.size();
const char* pPattern = pattern.data();
const char* const pPatternEnd = pPattern + pattern.size();
const char* pPatternAfterAsterisk = nullptr;
const char* pAsteriskMatchEnd = nullptr;
while (pText < pTextEnd)
{
if (pPattern < pPatternEnd && *pPattern == '*')
{
pPatternAfterAsterisk = ++pPattern;
pAsteriskMatchEnd = pText;
// OPTIONAL: Speedup the loop (optional block with quick forward lookup):
{
while (pPattern < pPatternEnd && *pPattern == '*')
{
++pPattern;
++pPatternAfterAsterisk;
}
if (pPattern < pPatternEnd && *pPattern != '?')
{
const char* const p = static_cast<const char*>(memchr(pText, *pPattern, pTextEnd - pText));
if (p == nullptr)
{
return false;
}
pAsteriskMatchEnd = p;
pText = p + 1;
++pPattern;
}
}
continue;
}
else if (pPattern < pPatternEnd && (
*pPattern == '?' ||
*pPattern == *pText
))
{
++pText;
++pPattern;
continue;
}
else if (pPatternAfterAsterisk == nullptr)
{
// Characters do not match or pattern is used up
// and '*' was not met in the pattern before
return false;
}
else
{
// Backtrack and match one more character by '*'
pPattern = pPatternAfterAsterisk;
pText = ++pAsteriskMatchEnd;
// OPTIONAL: Speedup the loop (optional block with quick forward lookup):
{
if (pPattern < pPatternEnd && *pPattern != '?')
{
assert(*pPattern != '*' && "This is guaranteed by the speedup block above");
const char* const p = static_cast<const char*>(memchr(pText, *pPattern, pTextEnd - pText));
if (p == nullptr)
{
return false;
}
pAsteriskMatchEnd = p;
pText = p + 1;
++pPattern;
}
}
}
}
for (; pPattern < pPatternEnd; ++pPattern)
{
if (*pPattern != '*')
{
return false;
}
}
return true;
}
#else
// Original implementation
__declspec(noinline) // noinline is added to help CPU profiling in release version
bool CFnMatch::Match(const std::string_view text, const std::string_view pattern)
{
size_t patternPos = 0;
size_t textPos = 0;
size_t asteriskMatchEndTextPos = 0;
ptrdiff_t asteriskPatternPos = -1;
while (textPos < text.size())
{
if (patternPos < pattern.size() && pattern[patternPos] == '*')
{
asteriskPatternPos = patternPos;
asteriskMatchEndTextPos = textPos;
++patternPos;
}
else if (patternPos < pattern.size() && (
pattern[patternPos] == '?' ||
pattern[patternPos] == text[textPos]
))
{
++textPos;
++patternPos;
}
else if (asteriskPatternPos == -1)
{
// Characters do not match or pattern is used up
// and '*' was not met in the pattern before
return false;
}
else
{
// Backtrack and match one more character by '*'
patternPos = asteriskPatternPos + 1;
++asteriskMatchEndTextPos;
textPos = asteriskMatchEndTextPos;
}
}
// Rest of the pattern can contain only asterisks
if (pattern.find_first_not_of('*', patternPos) != pattern.npos)
{
return false;
}
return true;
}
#endif