Skip to content

Commit

Permalink
Rewrite handling of cleaning up BBCode in a vote line.
Browse files Browse the repository at this point in the history
Refactored to separate out handling of striping pre-content markup, and
normalizing in-content markup.
Rewrite of normalizing code because old version gave incorrect results.
New version is also faster. (about twice as fast)
  • Loading branch information
Kinematics committed Dec 5, 2015
1 parent 00e0f62 commit cb2f7ce
Showing 1 changed file with 129 additions and 61 deletions.
190 changes: 129 additions & 61 deletions TallyCore/Votes/VoteString.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Linq;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using NetTally.Utility;

Expand Down Expand Up @@ -42,20 +43,14 @@ private static class VoteComponents
// Regex for the pre-content area of a vote line, that will only match if there are no BBCode tags in that area of the vote line.
static readonly Regex precontentRegex = new Regex(@"^(?:[\s-]*)\[[xX✓✔1-9]\](?!\s*\[/(?:[bui]|color)\])(?!(?:\s*\[(?:[bui]|color=[^]]+)\])+\s*\[(?![bui]|color=[^]]+|url=[^]]+)[^]]+\])");

// Regex for any opening or closing BBCode tag.
static readonly Regex allBBCodeRegex = new Regex(@"(\[(?:/)?(?:b|i|u|color)(?(?<=\[color)=[^]]+)\])");
// Regex for any opening BBCode tag.
static readonly Regex openBBCodeRegex = new Regex(@"\[(b|i|u|color=[^]]+)\]");
static readonly Regex openBBCodeRegex = new Regex(@"^\[(b|i|u|color)(?(?<=\[color)=[^]]+)\]");
// Regex for any closing BBCode tag.
static readonly Regex closeBBCodeRegex = new Regex(@"\[/(b|i|u|color)\]");
// Regex to extract a tag from a BBCode element.
static readonly Regex tagRegex = new Regex(@"\[/?(?<tag>[biu](?=\])|(?<=/)color(?=\])|color(?==[^]]+))(=[^]]+)?\]");
// Regexes for the indicated closing tags.
static readonly Dictionary<string, Regex> closeTagRegexes = new Dictionary<string, Regex>(StringComparer.OrdinalIgnoreCase)
{
["b"] = new Regex(@"\[/b\]"),
["i"] = new Regex(@"\[/i\]"),
["u"] = new Regex(@"\[/u\]"),
["color"] = new Regex(@"\[/color\]"),
};
static readonly Regex closeBBCodeRegex = new Regex(@"^\[/(b|i|u|color)\]");

static readonly Dictionary<string, int> countTags = new Dictionary<string, int>() {["b"] = 0,["i"] = 0,["u"] = 0,["color"] = 0 };
#endregion

#region Cleanup functions
Expand Down Expand Up @@ -97,81 +92,154 @@ public static string DeUrlContent(string contents)
/// <returns>Returns the vote line cleaned up of BBCode.</returns>
public static string CleanVoteLineBBCode(string line)
{
Match m;
string clean = line;
line = StripPrecontentBBCode(line);

line = NormalizeContentBBCode(line);

return line;
}

m = precontentRegex.Match(line);
/// <summary>
/// Remove all BBCode tags from the pre-content portion of a vote line.
/// </summary>
/// <param name="line">The vote line to modify.</param>
/// <returns>Returns the vote line without any BBCode in the pre-content area.</returns>
private static string StripPrecontentBBCode(string line)
{
// Remove BBCode markup one at a time, until we get a clear check
// across the entire precontent area. Any BBCode in the content
// area is left alone.
Match m = precontentRegex.Match(line);
while (m.Success == false)
{
string cleaned = markupRegex.Replace(clean, "", 1);
string cleaned = markupRegex.Replace(line, "", 1);

if (cleaned != clean)
if (cleaned != line)
{
clean = cleaned;
m = precontentRegex.Match(clean);
line = cleaned;
m = precontentRegex.Match(line);
}
else
{
break;
}
}

var opens = openBBCodeRegex.Matches(clean);
var closes = closeBBCodeRegex.Matches(clean);
return line;
}

var openGroups = opens.OfType<Match>().GroupBy(a => GetTag(a.Value));
var closeGroups = closes.OfType<Match>().GroupBy(a => GetTag(a.Value));
/// <summary>
/// Make sure all BBCode tags have appropriate matching start/end tags.
/// Any tags that don't have a proper match are removed. This includes having
/// a close tag before any opening tag (even if there's another open tag later on),
/// or an open tag that's not followed by an end tag (even if there was an end tag
/// earlier on).
/// </summary>
/// <param name="line">The vote line to modify.</param>
/// <returns>Returns a normalized version of the vote line, with proper matching BBCode tags.</returns>
private static string NormalizeContentBBCode(string line)
{
var lineSplit = allBBCodeRegex.Split(line);

// If there were no BBCode tags, just return the original line.
if (lineSplit.Length == 1)
return line;

// Run matches for opens and closes on all line splits, so we don't have to do it again later.
Match[] openMatches = new Match[lineSplit.Length];
Match[] closeMatches = new Match[lineSplit.Length];

foreach (var tag in closeGroups)
for (int i = 0; i < lineSplit.Length; i++)
{
var matchingOpen = openGroups.FirstOrDefault(a => a.Key == tag.Key);
// If there are no matching opens for the closing tag, remove all instances of the closing tag.
if (matchingOpen == null)
{
clean = closeTagRegexes[tag.Key].Replace(clean, "", tag.Count());
}
else
openMatches[i] = openBBCodeRegex.Match(lineSplit[i]);
closeMatches[i] = closeBBCodeRegex.Match(lineSplit[i]);
}

// Rebuild the result
StringBuilder sb = new StringBuilder(line.Length);
string tag;

// Reset counts
countTags["b"] = 0;
countTags["i"] = 0;
countTags["u"] = 0;
countTags["color"] = 0;


for (int i = 0; i < lineSplit.Length; i++)
{
// Skip blank entries from the split
if (lineSplit[i] == string.Empty)
continue;

if (openMatches[i].Success)
{
// Otherwise remove as many additional close tags as are found.
int diff = tag.Count() - matchingOpen.Count();
tag = openMatches[i].Groups[1].Value;

if (diff > 0)
for (int j = i + 1; j < lineSplit.Length; j++)
{
clean = closeTagRegexes[tag.Key].Replace(clean, "", diff);
if (lineSplit[j] == string.Empty)
continue;

if (closeMatches[j].Success && closeMatches[j].Groups[1].Value == tag)
{
if (countTags[tag] > 0)
{
countTags[tag]--;
}
else
{
// We've found a matching open tag. Add this close tag and end the loop.
sb.Append(lineSplit[i]);
break;
}
}
else if (openMatches[j].Success && openMatches[j].Groups[1].Value == tag)
{
countTags[tag]++;
}
}
}
}

foreach (var tag in openGroups)
{
var matchingClose = closeGroups.FirstOrDefault(a => a.Key == tag.Key);

int? diff = tag.Count() - matchingClose?.Count();
string closeTag = $"[/{tag.Key}]";
countTags[tag] = 0;
}
else if (closeMatches[i].Success)
{
tag = closeMatches[i].Groups[1].Value;

int num = diff.HasValue ? diff.Value : tag.Count();
for (int j = i - 1; j >= 0; j--)
{
if (lineSplit[j] == string.Empty)
continue;

if (openMatches[j].Success && openMatches[j].Groups[1].Value == tag)
{
if (countTags[tag] > 0)
{
countTags[tag]--;
}
else
{
// We've found a matching open tag. Add this close tag and end the loop.
sb.Append(lineSplit[i]);
break;
}
}
else if (closeMatches[j].Success && closeMatches[j].Groups[1].Value == tag)
{
countTags[tag]++;
}
}

for (int i = 0; i < num; i++)
countTags[tag] = 0;
}
else
{
clean += closeTag;
// This isn't a BBCode tag, so just add it to the pile.
sb.Append(lineSplit[i]);
}
}

return clean.Trim();
}

/// <summary>
/// Tags a BBCode opening or closing tag element, and extracts the tag from it.
/// </summary>
/// <param name="input">A BBCode markup element.</param>
/// <returns>Returns the BBCode tag from the element.</returns>
private static string GetTag(string input)
{
Match m = tagRegex.Match(input);
if (m.Success)
return m.Groups["tag"].Value;

return string.Empty;
return sb.ToString();
}
#endregion

Expand Down

0 comments on commit cb2f7ce

Please sign in to comment.