From 0229f37ec69d32353d3ad960ee4f4f20656ba771 Mon Sep 17 00:00:00 2001
From: ThioJoe <12518330+ThioJoe@users.noreply.github.com>
Date: Mon, 1 Jan 2024 15:40:39 -0700
Subject: [PATCH] Better highlighting matched samples

-Also now shows message explaining red highlights.
---
 Scripts/logging.py    | 28 +++++++++++++++++++---------
 Scripts/operations.py |  2 +-
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/Scripts/logging.py b/Scripts/logging.py
index b979845e..40f53c00 100644
--- a/Scripts/logging.py
+++ b/Scripts/logging.py
@@ -45,14 +45,18 @@ def print_comments(current, config, scanVideoID, loggingEnabled, scanMode, logMo
       write_plaintext_log(current.logFileName, commentsContents)
     print("                                             ")
 
-  # Check if any flagged as possible false positives
+  # Check if any flagged as possible false positives or any matched from spam lists
   possibleFalsePositive = False
-  for author in current.matchSamplesDict.values():
-    if author['possibleFalsePositive'] == True:
+  knownSpamListMatch = False
+  for sample in current.matchSamplesDict.values():
+    if sample['possibleFalsePositive'] == True:
       possibleFalsePositive = True
       break
-    
-
+  for sample in current.matchSamplesDict.values():
+    if sample['nameAndTextColorized'] is not None:
+      knownSpamListMatch = True
+      break
+  
   # Print Sample Match List
   valuesPreparedToWrite = ""
   valuesPreparedToPrint = ""
@@ -112,6 +116,8 @@ def print_and_write(value, writeValues, printValues):
     print(f"{F.LIGHTMAGENTA_EX}============================ Match Samples: One comment per matched-comment author ============================{S.R}")
     if possibleFalsePositive:
       print(f"{F.GREEN}======= {B.GREEN}{F.BLACK} NOTE: {S.R}{F.GREEN} Possible false positives marked with * and highlighted in green. Check them extra well! ======={S.R}")
+    if knownSpamListMatch:
+      print(f"{F.RED}*NOTE: Specific matches from known spam lists are highlighted in red.{S.R}")
   for value in current.matchSamplesDict.values():
     if value['matchReason'] != "Duplicate" and value['matchReason'] != "Spam Bot Thread" and value['matchReason'] != "Repost":
       valuesPreparedToWrite, valuesPreparedToPrint = print_and_write(value, valuesPreparedToWrite, valuesPreparedToPrint)
@@ -745,10 +751,14 @@ def download_profile_pictures(pictureUrlsDict, jsonSettingsDict):
 # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 # Colorize the matched text within a string using colorama. Matches using regex to account for case insensitivity
 def colorize_text(originalString, matchedText, color):
-  escapedMatchedText = re.escape(matchedText)
-  colorizedString = re.sub(escapedMatchedText, f"{color}{matchedText}{S.R}", originalString, flags=re.I)
-  # colorizedString = originalString.replace(matchedText, f"{color}{matchedText}{S.R}")
-  return colorizedString
+    escapedMatchedText = re.escape(matchedText)
+
+    def replace_with_color(match):
+        return f"{color}{match.group(0)}{S.R}"
+
+    colorizedString = re.sub(escapedMatchedText, replace_with_color, originalString, flags=re.I)
+    return colorizedString
+  
 
 # Adds a sample to current.matchSamplesDict and preps formatting
 def add_sample(current, authorID, authorNameRaw, commentText, matchReason, matchedText, longestAuthorNameLength):
diff --git a/Scripts/operations.py b/Scripts/operations.py
index dda42276..aea58757 100644
--- a/Scripts/operations.py
+++ b/Scripts/operations.py
@@ -976,7 +976,7 @@ def multiVarDetect(text, username):
       elif any(findObf(expressionPair[0], expressionPair[1], authorChannelName) for expressionPair in compiledObfuRegexDict['usernameObfuBlackWords']):  
         add_spam(current, config, miscData, currentCommentDict, videoID)
       # Simultaneously checks elif statement and assigns matchedText variable using walrus operator
-      elif (matchedText := spamListCombinedRegex.search(combinedStringNormalized.lower())) is not None:
+      elif (matchedText := spamListCombinedRegex.search(combinedStringNormalized)) is not None: # Used to do .lower() but took it out to be able to use matched text later
         add_spam(current, config, miscData, currentCommentDict, videoID, matchedText=matchedText.group(0))
       elif config['detect_link_spam'] and check_if_only_link(commentTextNormalized.strip()):
         add_spam(current, config, miscData, currentCommentDict, videoID)