Add tag-consensus harassment/abuse tag note scoring + reputation filt…

…ering - After the 1st phase matrix factorization, compute a new matrix factorization with the harassment/abuse tag as the label, instead of the overall helpful rating like normal. -Add a large rater reputation penalty on raters who have rated any notes with extremely high harassment/abuse scores as helpful -Support BCEWithLogits loss and pos_weight in order to train the imbalanced binary matrix factorization well
twitter · Nov 14, 2023 · d42c4ad · d42c4ad
1 parent eedb9c7
commit d42c4ad
Show file tree

Hide file tree

Showing 8 changed files with 273 additions and 20 deletions.
diff --git a/sourcecode/scoring/constants.py b/sourcecode/scoring/constants.py
@@ -60,6 +60,7 @@
 # Timestamps
 deletedNoteTombstonesLaunchTime = 1652918400000  # May 19, 2022 UTC
 notMisleadingUILaunchTime = 1664755200000  # October 3, 2022 UTC
+lastRatingTagsChangeTimeMillis = 1639699200000  # 2021/12/15 UTC
 publicTSVTimeDelay = 172800000  # 48 hours
 
 # Explanation Tags
@@ -138,6 +139,12 @@ def rater_factor_key(i):
 groupNoteInterceptMinKey = "groupNoteInterceptMin"
 groupRaterInterceptKey = "groupRaterIntercept"
 groupRaterFactor1Key = "groupRaterFactor1"
+# Harassment/Abuse Tag
+harassmentNoteInterceptKey = "harassmentNoteIntercept"
+harassmentNoteFactor1Key = "harassmentNoteFactor1"
+harassmentRaterInterceptKey = "harassmentRaterIntercept"
+harassmentRaterFactor1Key = "harassmentRaterFactor1"
+
 
 # Ids and Indexes
 noteIdKey = "noteId"
@@ -162,6 +169,8 @@ def rater_factor_key(i):
 raterAgreeRatioKey = "raterAgreeRatio"
 ratingAgreesWithNoteStatusKey = "ratingAgreesWithNoteStatus"
 aboveHelpfulnessThresholdKey = "aboveHelpfulnessThreshold"
+totalHelpfulHarassmentRatingsPenaltyKey = "totalHelpfulHarassmentPenalty"
+raterAgreeRatioWithHarassmentAbusePenaltyKey = "raterAgreeRatioKeyWithHarassmentAbusePenalty"
 
 # Note Status Labels
 currentlyRatedHelpful = "CURRENTLY_RATED_HELPFUL"

diff --git a/sourcecode/scoring/helpfulness_scores.py b/sourcecode/scoring/helpfulness_scores.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from . import constants as c
 
 import numpy as np
@@ -70,6 +72,11 @@ def compute_general_helpfulness_scores(
   minMeanNoteScore: float,
   minCRHVsCRNHRatio: float,
   minRaterAgreeRatio: float,
+  ratings: Optional[pd.DataFrame] = None,
+  tagConsensusHarassmentAbuseNotes: Optional[pd.DataFrame] = None,
+  tagConsensusHarassmentHelpfulRatingPenalty=10,
+  multiplyPenaltyByHarassmentScore: bool = False,
+  minimumHarassmentScoreToPenalize: float = 2.5,
 ) -> pd.DataFrame:
   """Given notes scored by matrix factorization, compute helpfulness scores.
   Author helpfulness scores are based on the scores of the notes you wrote.
@@ -84,6 +91,7 @@ def compute_general_helpfulness_scores(
         comparing how often an author produces CRH / CRNH notes.  See author_helpfulness.
       minRaterAgreeRatio: minimum standard for how often a rater must predict the
         eventual outcome when rating before a note is assigned status.
+      ratings: all ratings (to check if tag-consensus harassment/abuse notes were rated helpful)
   Returns:
       helpfulness_scores pandas.DataFrame: 1 row per user, with helpfulness scores as columns.
   """
@@ -101,26 +109,67 @@ def compute_general_helpfulness_scores(
         c.crhCrnhRatioDifferenceKey,
         c.meanNoteScoreKey,
         c.raterAgreeRatioKey,
+        c.ratingAgreesWithNoteStatusKey,
+        c.ratingCountKey,
       ]
     ]
   )
 
-  helpfulnessScores[c.aboveHelpfulnessThresholdKey] = (
-    (
-      (helpfulnessScores[c.crhCrnhRatioDifferenceKey] >= minCRHVsCRNHRatio)
-      & (helpfulnessScores[c.meanNoteScoreKey] >= minMeanNoteScore)
+  if (ratings is None) or (tagConsensusHarassmentAbuseNotes is None):
+    helpfulnessScores[c.totalHelpfulHarassmentRatingsPenaltyKey] = 0
+  else:
+    filteredAbuseNotes = tagConsensusHarassmentAbuseNotes[
+      tagConsensusHarassmentAbuseNotes[c.harassmentNoteInterceptKey]
+      >= minimumHarassmentScoreToPenalize
+    ]
+    helpfulRatingsOnBadNotes = ratings[ratings[c.helpfulNumKey] == 1].merge(
+      filteredAbuseNotes, on=c.noteIdKey
     )
-    | (
-      pd.isna(helpfulnessScores[c.crhCrnhRatioDifferenceKey])
-      & pd.isna(helpfulnessScores[c.meanNoteScoreKey])
+
+    helpfulRatingsOnBadNotes[
+      c.totalHelpfulHarassmentRatingsPenaltyKey
+    ] = tagConsensusHarassmentHelpfulRatingPenalty
+    if multiplyPenaltyByHarassmentScore:
+      helpfulRatingsOnBadNotes[c.totalHelpfulHarassmentRatingsPenaltyKey] *= (
+        helpfulRatingsOnBadNotes[c.harassmentNoteInterceptKey] / minimumHarassmentScoreToPenalize
+      )
+
+    helpfulRatingsOnBadNotesCount = (
+      helpfulRatingsOnBadNotes.groupby(c.raterParticipantIdKey)
+      .sum()[[c.totalHelpfulHarassmentRatingsPenaltyKey]]
+      .reset_index()
     )
-    | (
-      pd.isna(helpfulnessScores[c.crhCrnhRatioDifferenceKey])
-      & helpfulnessScores[c.meanNoteScoreKey]
-      >= minMeanNoteScore
+    helpfulnessScores = helpfulnessScores.merge(
+      helpfulRatingsOnBadNotesCount, on=c.raterParticipantIdKey, how="left"
     )
-  ) & (helpfulnessScores[c.raterAgreeRatioKey] >= minRaterAgreeRatio)
+    helpfulnessScores[c.totalHelpfulHarassmentRatingsPenaltyKey].fillna(0, inplace=True)
+
+  helpfulnessScores[c.raterAgreeRatioWithHarassmentAbusePenaltyKey] = (
+    helpfulnessScores[c.ratingAgreesWithNoteStatusKey]
+    - helpfulnessScores[c.totalHelpfulHarassmentRatingsPenaltyKey]
+  ) / helpfulnessScores[c.ratingCountKey]
+
+  helpfulnessScores[c.aboveHelpfulnessThresholdKey] = (
+    (
+      (
+        (helpfulnessScores[c.crhCrnhRatioDifferenceKey] >= minCRHVsCRNHRatio)
+        & (helpfulnessScores[c.meanNoteScoreKey] >= minMeanNoteScore)
+      )
+      | (
+        pd.isna(helpfulnessScores[c.crhCrnhRatioDifferenceKey])
+        & pd.isna(helpfulnessScores[c.meanNoteScoreKey])
+      )
+      | (
+        pd.isna(helpfulnessScores[c.crhCrnhRatioDifferenceKey])
+        & helpfulnessScores[c.meanNoteScoreKey]
+        >= minMeanNoteScore
+      )
+    )
+    & (helpfulnessScores[c.raterAgreeRatioKey] >= minRaterAgreeRatio)
+    & (helpfulnessScores[c.raterAgreeRatioWithHarassmentAbusePenaltyKey] >= minRaterAgreeRatio)
+  )
 
+  helpfulnessScores.drop(columns=[c.ratingCountKey, c.ratingAgreesWithNoteStatusKey], inplace=True)
   return helpfulnessScores
 
 

diff --git a/sourcecode/scoring/matrix_factorization/matrix_factorization.py b/sourcecode/scoring/matrix_factorization/matrix_factorization.py
@@ -30,6 +30,8 @@ def __init__(
     model: Optional[BiasedMatrixFactorization] = None,
     featureCols: List[str] = [c.noteIdKey, c.raterParticipantIdKey],
     labelCol: str = c.helpfulNumKey,
+    useSigmoidCrossEntropy=False,
+    posWeight=None,
   ) -> None:
     """Configure matrix factorization note ranking."""
     self._l2_lambda = l2_lambda
@@ -43,8 +45,25 @@ def __init__(
     self._flipFactorsForIdentification = flipFactorsForIdentification
     self._featureCols = featureCols
     self._labelCol = labelCol
+    self._useSigmoidCrossEntropy = useSigmoidCrossEntropy
+    self._posWeight = posWeight
+
+    if self._useSigmoidCrossEntropy:
+      if self._posWeight:
+        if logging:
+          print(f"Using pos weight: {self._posWeight} with BCEWithLogitsLoss")
+        self.criterion = torch.nn.BCEWithLogitsLoss(
+          pos_weight=torch.Tensor(np.array(self._posWeight))
+        )
+      else:
+        if logging:
+          print("Using BCEWithLogitsLoss")
+        self.criterion = torch.nn.BCEWithLogitsLoss()
+    else:
+      if self._posWeight:
+        raise ValueError("posWeight is not supported for MSELoss")
+      self.criterion = torch.nn.MSELoss()
 
-    self.criterion = torch.nn.MSELoss()
     self.train_errors: List[float] = []
     self.test_errors: List[float] = []
     self.mf_model = model
@@ -340,7 +359,6 @@ def _fit_model(
     while (abs(loss.item() - prev_loss) > self._convergence) and (
       not (epoch > 100 and loss.item() > prev_loss)
     ):
-
       prev_loss = loss.item()
 
       # Backpropagate

diff --git a/sourcecode/scoring/mf_base_scorer.py b/sourcecode/scoring/mf_base_scorer.py
@@ -1,6 +1,6 @@
 from typing import List, Optional, Tuple
 
-from . import constants as c, helpfulness_scores, note_ratings, process_data
+from . import constants as c, helpfulness_scores, note_ratings, process_data, tag_consensus
 from .matrix_factorization.matrix_factorization import MatrixFactorization
 from .matrix_factorization.pseudo_raters import PseudoRatersRunner
 from .scorer import Scorer
@@ -36,6 +36,7 @@ def __init__(
     crhSuperThreshold: float = 0.5,
     inertiaDelta: float = 0.01,
     useStableInitialization: bool = True,
+    saveIntermediateState: bool = False,
   ):
     """Configure MatrixFactorizationScorer object.
 
@@ -88,6 +89,7 @@ def __init__(
     self._crhSuperThreshold = crhSuperThreshold
     self._inertiaDelta = inertiaDelta
     self._modelingGroupToInitializeForStability = 13 if useStableInitialization else None
+    self._saveIntermediateState = saveIntermediateState
     self._mfRanker = MatrixFactorization()
 
   def get_crh_threshold(self) -> float:
@@ -121,7 +123,10 @@ def get_helpfulness_scores_cols(self) -> List[str]:
 
   def get_auxiliary_note_info_cols(self) -> List[str]:
     """Returns a list of columns which should be present in the auxiliaryNoteInfo output."""
-    return [c.noteIdKey, c.ratingWeightKey,] + (
+    return [
+      c.noteIdKey,
+      c.ratingWeightKey,
+    ] + (
       c.notHelpfulTagsAdjustedColumns
       + c.notHelpfulTagsAdjustedRatioColumns
       + c.incorrectFilterColumns
@@ -281,11 +286,17 @@ def _score_notes_and_users(
     # Removes ratings where either (1) the note did not receive enough ratings, or
     # (2) the rater did not rate enough notes.
     ratingsForTraining = self._prepare_data_for_scoring(ratings)
+    if self._saveIntermediateState:
+      self.ratingsForTraining = ratingsForTraining
 
     # TODO: Save parameters from this first run in note_model_output next time we add extra fields to model output TSV.
     noteParamsUnfiltered, raterParamsUnfiltered, globalBias = self._run_stable_matrix_factorization(
       ratingsForTraining, userEnrollmentRaw
     )
+    if self._saveIntermediateState:
+      self.noteParamsUnfiltered = noteParamsUnfiltered
+      self.raterParamsUnfiltered = raterParamsUnfiltered
+      self.globalBias = globalBias
 
     # Get a dataframe of scored notes based on the algorithm results above
     scoredNotes = note_ratings.compute_scored_notes(
@@ -303,6 +314,8 @@ def _score_notes_and_users(
       crhSuperThreshold=self._crhSuperThreshold,
       inertiaDelta=self._inertiaDelta,
     )
+    if self._saveIntermediateState:
+      self.firstRoundScoredNotes = scoredNotes
 
     # Determine "valid" ratings
     validRatings = note_ratings.get_valid_ratings(
@@ -317,9 +330,48 @@ def _score_notes_and_users(
         ]
       ],
     )
+    if self._saveIntermediateState:
+      self.validRatings = validRatings
 
     # Assigns contributor (author & rater) helpfulness bit based on (1) performance
     # authoring and reviewing previous and current notes.
+    helpfulnessScoresPreHarassmentFilter = helpfulness_scores.compute_general_helpfulness_scores(
+      scoredNotes[
+        [
+          c.noteAuthorParticipantIdKey,
+          c.currentlyRatedHelpfulBoolKey,
+          c.currentlyRatedNotHelpfulBoolKey,
+          c.internalNoteInterceptKey,
+        ]
+      ],
+      validRatings,
+      self._minMeanNoteScore,
+      self._minCRHVsCRNHRatio,
+      self._minRaterAgreeRatio,
+      ratingsForTraining,
+    )
+    if self._saveIntermediateState:
+      self.firstRoundHelpfulnessScores = helpfulnessScoresPreHarassmentFilter
+
+    # Filters ratings matrix to include only rows (ratings) where the rater was
+    # considered helpful.
+    ratingsHelpfulnessScoreFilteredPreHarassmentFilter = (
+      helpfulness_scores.filter_ratings_by_helpfulness_scores(
+        ratingsForTraining, helpfulnessScoresPreHarassmentFilter
+      )
+    )
+    if self._saveIntermediateState:
+      self.ratingsHelpfulnessScoreFilteredPreHarassmentFilter = (
+        ratingsHelpfulnessScoreFilteredPreHarassmentFilter
+      )
+
+    harassmentAbuseNoteParams, _, _ = tag_consensus.train_tag_model(
+      ratingsHelpfulnessScoreFilteredPreHarassmentFilter, c.notHelpfulSpamHarassmentOrAbuseTagKey
+    )
+
+    # Assigns contributor (author & rater) helpfulness bit based on (1) performance
+    # authoring and reviewing previous and current notes, and (2) including an extra
+    # penalty for rating a harassment/abuse note as helpful.
     helpfulnessScores = helpfulness_scores.compute_general_helpfulness_scores(
       scoredNotes[
         [
@@ -333,13 +385,19 @@ def _score_notes_and_users(
       self._minMeanNoteScore,
       self._minCRHVsCRNHRatio,
       self._minRaterAgreeRatio,
+      ratings=ratingsForTraining,
+      tagConsensusHarassmentAbuseNotes=harassmentAbuseNoteParams,
     )
+    if self._saveIntermediateState:
+      self.firstRoundHelpfulnessScores = helpfulnessScores
 
     # Filters ratings matrix to include only rows (ratings) where the rater was
     # considered helpful.
     ratingsHelpfulnessScoreFiltered = helpfulness_scores.filter_ratings_by_helpfulness_scores(
       ratingsForTraining, helpfulnessScores
     )
+    if self._saveIntermediateState:
+      self.ratingsHelpfulnessScoreFiltered = ratingsHelpfulnessScoreFiltered
 
     # Re-runs matrix factorization using only ratings given by helpful raters.
     noteParams, raterParams, globalBias = self._mfRanker.run_mf(
@@ -357,6 +415,10 @@ def _score_notes_and_users(
     else:
       for col in c.noteParameterUncertaintyTSVColumns:
         noteParams[col] = np.nan
+    if self._saveIntermediateState:
+      self.noteParams = noteParams
+      self.raterParams = raterParams
+      self.globalBias = globalBias
 
     # Assigns updated CRH / CRNH bits to notes based on volume of prior ratings
     # and ML output.
@@ -376,6 +438,7 @@ def _score_notes_and_users(
       inertiaDelta=self._inertiaDelta,
       finalRound=True,
     )
+
     # Takes raterParams from most recent MF run, but use the pre-computed
     # helpfulness scores.
     helpfulnessScores = raterParams.merge(
@@ -392,4 +455,8 @@ def _score_notes_and_users(
       how="outer",
     )
 
+    if self._saveIntermediateState:
+      self.scoredNotes = scoredNotes
+      self.helpfulnessScores = helpfulnessScores
+
     return scoredNotes, helpfulnessScores
diff --git a/sourcecode/scoring/mf_core_scorer.py b/sourcecode/scoring/mf_core_scorer.py
@@ -19,6 +19,7 @@ def __init__(
     pseudoraters: Optional[bool] = False,
     core_threshold: float = 0.5,
     useStableInitialization: bool = True,
+    saveIntermediateState: bool = False,
   ) -> None:
     """Configure MFCoreScorer object.
 
@@ -28,7 +29,12 @@ def __init__(
       core_threshold: float specifying the fraction of reviews which must be from CORE users
         for a note to be in scope for the CORE model.
     """
-    super().__init__(seed, pseudoraters, useStableInitialization=useStableInitialization)
+    super().__init__(
+      seed,
+      pseudoraters,
+      useStableInitialization=useStableInitialization,
+      saveIntermediateState=saveIntermediateState,
+    )
     self._core_threshold = core_threshold
 
   def _get_note_col_mapping(self) -> Dict[str, str]:

diff --git a/sourcecode/scoring/mf_expansion_scorer.py b/sourcecode/scoring/mf_expansion_scorer.py
@@ -5,13 +5,23 @@
 
 
 class MFExpansionScorer(MFBaseScorer):
-  def __init__(self, seed: Optional[int] = None, useStableInitialization: bool = True) -> None:
+  def __init__(
+    self,
+    seed: Optional[int] = None,
+    useStableInitialization: bool = True,
+    saveIntermediateState: bool = False,
+  ) -> None:
     """Configure MFExpansionScorer object.
 
     Args:
       seed: if not None, seed value to ensure deterministic execution
     """
-    super().__init__(seed, pseudoraters=False, useStableInitialization=useStableInitialization)
+    super().__init__(
+      seed,
+      pseudoraters=False,
+      useStableInitialization=useStableInitialization,
+      saveIntermediateState=saveIntermediateState,
+    )
 
   def _get_note_col_mapping(self) -> Dict[str, str]:
     """Returns a dict mapping default note column names to custom names for a specific model."""