Skip to content

Commit

Permalink
added posix character class escapes to the regex expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
codewitch-honey-crisis committed Aug 23, 2019
1 parent 5435d3e commit 379f5fb
Show file tree
Hide file tree
Showing 5 changed files with 313 additions and 136 deletions.
4 changes: 2 additions & 2 deletions common/GlobalAssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("0.0.1.6")]
[assembly: AssemblyFileVersion("0.0.1.6")]
[assembly: AssemblyVersion("0.0.1.7")]
[assembly: AssemblyFileVersion("0.0.1.7")]
177 changes: 85 additions & 92 deletions fa/CharFA.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Text;

Expand Down Expand Up @@ -710,99 +711,91 @@ public static IDictionary<string, IList<CharRange>> CharacterClasses
static IDictionary<string,IList<CharRange>> _GetCharacterClasses()
{
var result = new Dictionary<string, IList<CharRange>>();
result.Add("letter", new List<CharRange>(CharRange.GetRanges(CharUtility.Letter)));
result.Add("digit", new List<CharRange>(CharRange.GetRanges(CharUtility.Digit)));
result.Add("alnum",
new List<CharRange>(
new CharRange[] {
new CharRange('A','Z'),
new CharRange('a', 'z'),
new CharRange('0', '9')
}));
result.Add("alpha",
new List<CharRange>(
new CharRange[] {
new CharRange('A','Z'),
new CharRange('a', 'z')
}));
result.Add("ascii",
new List<CharRange>(
new CharRange[] {
new CharRange('\0','\x7F')
}));
result.Add("blank",
new List<CharRange>(
new CharRange[] {
new CharRange(' ',' '),
new CharRange('\t','\t')
}));
result.Add("cntrl",
new List<CharRange>(
new CharRange[] {
new CharRange('\0','\x1F'),
new CharRange('\x7F','\x7F')
}));
result.Add("digit",
new List<CharRange>(
new CharRange[] {
new CharRange('0', '9')
}));
result.Add("^digit", new List<CharRange>(CharRange.NotRanges(result["digit"])));
result.Add("graph",
new List<CharRange>(
new CharRange[] {
new CharRange('\x21', '\x7E')
}));
result.Add("lower",
new List<CharRange>(
new CharRange[] {
new CharRange('a', 'z')
}));
result.Add("print",
new List<CharRange>(
new CharRange[] {
new CharRange('\x20', '\x7E')
}));
// [!"\#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~]
result.Add("punct",
new List<CharRange>(
CharRange.GetRanges("!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~")
));
//[ \t\r\n\v\f]
result.Add("space",
new List<CharRange>(
CharRange.GetRanges(" \t\r\n\v\f")
));
result.Add("^space", new List<CharRange>(CharRange.NotRanges(result["space"])));
result.Add("upper",
new List<CharRange>(
new CharRange[] {
new CharRange('A', 'Z')
}));
result.Add("word",
new List<CharRange>(
new CharRange[] {
new CharRange('0', '9'),
new CharRange('A', 'Z'),
new CharRange('_', '_'),
new CharRange('a', 'z')
}));
result.Add("^word",new List<CharRange>(CharRange.NotRanges(result["word"])));
result.Add("xdigit",
new List<CharRange>(
new CharRange[] {
new CharRange('0', '9'),
new CharRange('A', 'F'),
new CharRange('a', 'f')
}));
return result;

}
static char _ReadRangeChar(IEnumerator<char> e)
{
char ch;
if ('\\' != e.Current || !e.MoveNext())
{
return e.Current;
}
ch = e.Current;
switch (ch)
{
case 't':
ch = '\t';
break;
case 'n':
ch = '\n';
break;
case 'r':
ch = '\r';
break;
case '0':
ch = '\0';
break;
case 'v':
ch = '\v';
break;
case 'f':
ch = '\f';
break;
case 'b':
ch = '\b';
break;
case 'x':
if (!e.MoveNext())
throw new ExpectingException("Expecting input for escape \\x");
ch = e.Current;
byte x = _FromHexChar(ch);
if (!e.MoveNext())
{
ch = unchecked((char)x);
return ch;
}
x *= 0x10;
x += _FromHexChar(e.Current);
ch = unchecked((char)x);
break;
case 'u':
if (!e.MoveNext())
throw new ExpectingException("Expecting input for escape \\u");
ch = e.Current;
ushort u = _FromHexChar(ch);
if (!e.MoveNext())
{
ch = unchecked((char)u);
return ch;
}
u *= 0x10;
u += _FromHexChar(e.Current);
if (!e.MoveNext())
{
ch = unchecked((char)u);
return ch;
}
u *= 0x10;
u += _FromHexChar(e.Current);
if (!e.MoveNext())
{
ch = unchecked((char)u);
return ch;
}
u *= 0x10;
u += _FromHexChar(e.Current);
ch = unchecked((char)u);
break;
default: // return itself
break;
}
return ch;
}
static byte _FromHexChar(char hex)
{
if (':' > hex && '/' < hex)
return (byte)(hex - '0');
if ('G' > hex && '@' < hex)
return (byte)(hex - '7'); // 'A'-10
if ('g' > hex && '`' < hex)
return (byte)(hex - 'W'); // 'a'-10
throw new ArgumentException("The value was not hex.", "hex");
}


}
}
43 changes: 39 additions & 4 deletions fa/RegexCharsetExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,51 @@ public override CharFA<TAccept> ToFA<TAccept>(TAccept accept)
protected internal override void AppendTo(StringBuilder sb)
{
// special case for "."
if(!HasNegatedRanges && 1==Entries.Count)
if(1==Entries.Count)
{
var e = Entries[0] as RegexCharsetRangeEntry;
if(null!=e && e.First==char.MinValue && e.Last==char.MaxValue)
var dotE = Entries[0] as RegexCharsetRangeEntry;
if(!HasNegatedRanges && null !=dotE && dotE.First==char.MinValue && dotE.Last==char.MaxValue)
{
sb.Append(".");
return;
}

var cls = Entries[0] as RegexCharsetClassEntry;
switch(cls.Name)
{
case "blank":
if (!HasNegatedRanges)
sb.Append(@"\h");
return;
case "digit":
if (!HasNegatedRanges)
sb.Append(@"\d");
else
sb.Append(@"\D");
return;
case "lower":
if (!HasNegatedRanges)
sb.Append(@"\l");
return;
case "space":
if (!HasNegatedRanges)
sb.Append(@"\s");
else
sb.Append(@"\S");
return;
case "upper":
if (!HasNegatedRanges)
sb.Append(@"\u");
return;
case "word":
if (!HasNegatedRanges)
sb.Append(@"\w");
else
sb.Append(@"\W");
return;

}
}

sb.Append('[');
if (HasNegatedRanges)
sb.Append('^');
Expand Down
Loading

0 comments on commit 379f5fb

Please sign in to comment.