Skip to content

Commit

Permalink
Better highlighting matched samples
Browse files Browse the repository at this point in the history
-Also now shows message explaining red highlights.
  • Loading branch information
ThioJoe committed Jan 1, 2024
1 parent b22d56e commit 0229f37
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
28 changes: 19 additions & 9 deletions Scripts/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,18 @@ def print_comments(current, config, scanVideoID, loggingEnabled, scanMode, logMo
write_plaintext_log(current.logFileName, commentsContents)
print(" ")

# Check if any flagged as possible false positives
# Check if any flagged as possible false positives or any matched from spam lists
possibleFalsePositive = False
for author in current.matchSamplesDict.values():
if author['possibleFalsePositive'] == True:
knownSpamListMatch = False
for sample in current.matchSamplesDict.values():
if sample['possibleFalsePositive'] == True:
possibleFalsePositive = True
break


for sample in current.matchSamplesDict.values():
if sample['nameAndTextColorized'] is not None:
knownSpamListMatch = True
break

# Print Sample Match List
valuesPreparedToWrite = ""
valuesPreparedToPrint = ""
Expand Down Expand Up @@ -112,6 +116,8 @@ def print_and_write(value, writeValues, printValues):
print(f"{F.LIGHTMAGENTA_EX}============================ Match Samples: One comment per matched-comment author ============================{S.R}")
if possibleFalsePositive:
print(f"{F.GREEN}======= {B.GREEN}{F.BLACK} NOTE: {S.R}{F.GREEN} Possible false positives marked with * and highlighted in green. Check them extra well! ======={S.R}")
if knownSpamListMatch:
print(f"{F.RED}*NOTE: Specific matches from known spam lists are highlighted in red.{S.R}")
for value in current.matchSamplesDict.values():
if value['matchReason'] != "Duplicate" and value['matchReason'] != "Spam Bot Thread" and value['matchReason'] != "Repost":
valuesPreparedToWrite, valuesPreparedToPrint = print_and_write(value, valuesPreparedToWrite, valuesPreparedToPrint)
Expand Down Expand Up @@ -745,10 +751,14 @@ def download_profile_pictures(pictureUrlsDict, jsonSettingsDict):
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Colorize the matched text within a string using colorama. Matches using regex to account for case insensitivity
def colorize_text(originalString, matchedText, color):
escapedMatchedText = re.escape(matchedText)
colorizedString = re.sub(escapedMatchedText, f"{color}{matchedText}{S.R}", originalString, flags=re.I)
# colorizedString = originalString.replace(matchedText, f"{color}{matchedText}{S.R}")
return colorizedString
escapedMatchedText = re.escape(matchedText)

def replace_with_color(match):
return f"{color}{match.group(0)}{S.R}"

colorizedString = re.sub(escapedMatchedText, replace_with_color, originalString, flags=re.I)
return colorizedString


# Adds a sample to current.matchSamplesDict and preps formatting
def add_sample(current, authorID, authorNameRaw, commentText, matchReason, matchedText, longestAuthorNameLength):
Expand Down
2 changes: 1 addition & 1 deletion Scripts/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ def multiVarDetect(text, username):
elif any(findObf(expressionPair[0], expressionPair[1], authorChannelName) for expressionPair in compiledObfuRegexDict['usernameObfuBlackWords']):
add_spam(current, config, miscData, currentCommentDict, videoID)
# Simultaneously checks elif statement and assigns matchedText variable using walrus operator
elif (matchedText := spamListCombinedRegex.search(combinedStringNormalized.lower())) is not None:
elif (matchedText := spamListCombinedRegex.search(combinedStringNormalized)) is not None: # Used to do .lower() but took it out to be able to use matched text later
add_spam(current, config, miscData, currentCommentDict, videoID, matchedText=matchedText.group(0))
elif config['detect_link_spam'] and check_if_only_link(commentTextNormalized.strip()):
add_spam(current, config, miscData, currentCommentDict, videoID)
Expand Down

0 comments on commit 0229f37

Please sign in to comment.