From abf89146c67d3120056f93ef09fbd1e2520f5b41 Mon Sep 17 00:00:00 2001 From: Jay Baxter Date: Fri, 23 Feb 2024 19:33:46 -0800 Subject: [PATCH] Increase harassment-abuse rater penalty, and update user enrollment 1. Raters who rate a note with a high harassment-abuse tag-consensus model note intercept will get an increased penalty to their raterAgreeRatio. 2. Work in progress changes to user enrollment: treat earnedOutNoAcknowledge the same as earnedOutAcknowledged going forward, and start counting the number of times earned out going forward. 3. Re-calibrate low diligence threshold so that it catches the same percent of notes prior to the harassment-abuse change. --- sourcecode/scoring/constants.py | 8 ++++++ sourcecode/scoring/contributor_state.py | 23 +++++++++++++++--- sourcecode/scoring/helpfulness_scores.py | 4 +-- sourcecode/scoring/mf_base_scorer.py | 12 ++++++++- sourcecode/scoring/mf_group_scorer.py | 8 +++++- sourcecode/scoring/note_ratings.py | 2 +- sourcecode/scoring/process_data.py | 31 +++++++++++++++++------- sourcecode/scoring/run_scoring.py | 4 +++ sourcecode/scoring/tag_consensus.py | 6 ++++- 9 files changed, 80 insertions(+), 18 deletions(-) diff --git a/sourcecode/scoring/constants.py b/sourcecode/scoring/constants.py index 6d6106b4..86c144e8 100644 --- a/sourcecode/scoring/constants.py +++ b/sourcecode/scoring/constants.py @@ -47,6 +47,7 @@ authorTopNotHelpfulTagValues = "authorTopNotHelpfulTagValues" modelingPopulationKey = "modelingPopulation" modelingGroupKey = "modelingGroup" +numberOfTimesEarnedOutKey = "numberOfTimesEarnedOut" # TSV Values notHelpfulValueTsv = "NOT_HELPFUL" @@ -416,10 +417,16 @@ def rater_factor_key(i): (timestampOfLastEarnOut, np.double), # double because nullable. (modelingPopulationKey, str), (modelingGroupKey, np.float64), + (numberOfTimesEarnedOutKey, np.int64), ] userEnrollmentTSVColumns = [col for (col, _) in userEnrollmentTSVColumnsAndTypes] userEnrollmentTSVTypes = [dtype for (_, dtype) in userEnrollmentTSVColumnsAndTypes] userEnrollmentTSVTypeMapping = {col: dtype for (col, dtype) in userEnrollmentTSVColumnsAndTypes} +# TODO: Remove the "old" user enrollment schemas below once numberOfTimesEarnedOut is in production +userEnrollmentTSVColumnsOld = [col for (col, _) in userEnrollmentTSVColumnsAndTypes[:7]] +userEnrollmentTSVTypeMappingOld = { + col: dtype for (col, dtype) in userEnrollmentTSVColumnsAndTypes[:7] +} noteInterceptMaxKey = "internalNoteIntercept_max" noteInterceptMinKey = "internalNoteIntercept_min" @@ -564,6 +571,7 @@ def rater_factor_key(i): (groupRaterFactor1Key, np.double), (modelingGroupKey, np.float64), (raterHelpfulnessReputationKey, np.double), + (numberOfTimesEarnedOutKey, np.int64), ] raterModelOutputTSVColumns = [col for (col, dtype) in raterModelOutputTSVColumnsAndTypes] raterModelOutputTSVTypeMapping = {col: dtype for (col, dtype) in raterModelOutputTSVColumnsAndTypes} diff --git a/sourcecode/scoring/contributor_state.py b/sourcecode/scoring/contributor_state.py index ab59624f..1bc88ad7 100644 --- a/sourcecode/scoring/contributor_state.py +++ b/sourcecode/scoring/contributor_state.py @@ -56,7 +56,6 @@ def is_earned_out(authorEnrollmentCounts: pd.DataFrame): return ( (authorEnrollmentCounts[c.enrollmentState] != c.newUser) & (authorEnrollmentCounts[c.enrollmentState] != c.earnedOutAcknowledged) - & (authorEnrollmentCounts[c.enrollmentState] != c.earnedOutNoAcknowledge) & (authorEnrollmentCounts[c.notesCurrentlyRatedNotHelpful] > c.isAtRiskCRNHCount) ) @@ -401,9 +400,27 @@ def get_contributor_state( contributorScoresWithEnrollment.loc[ is_at_risk(contributorScoresWithEnrollment), c.enrollmentState ] = c.enrollmentStateToThrift[c.atRisk] + + # for earned out users, first increment the number of times they have earned out, + # use this to overwrite successful rating needed to earn in, + # then set new state + earnedOutUsers = is_earned_out(contributorScoresWithEnrollment) + contributorScoresWithEnrollment.loc[earnedOutUsers, c.numberOfTimesEarnedOutKey] = ( + contributorScoresWithEnrollment.loc[earnedOutUsers, c.numberOfTimesEarnedOutKey] + 1 + ) + + contributorScoresWithEnrollment.loc[ + earnedOutUsers, c.successfulRatingNeededToEarnIn + ] = contributorScoresWithEnrollment.loc[earnedOutUsers].apply( + lambda row: c.ratingImpactForEarnIn + + max([row[c.ratingImpact], 0]) + + (c.ratingImpactForEarnIn * row[c.numberOfTimesEarnedOutKey]), + axis=1, + ) + contributorScoresWithEnrollment.loc[ - is_earned_out(contributorScoresWithEnrollment), c.enrollmentState - ] = c.enrollmentStateToThrift[c.earnedOutNoAcknowledge] + earnedOutUsers, c.enrollmentState + ] = c.enrollmentStateToThrift[c.earnedOutAcknowledged] contributorScoresWithEnrollment.loc[ is_earned_in(contributorScoresWithEnrollment), c.enrollmentState diff --git a/sourcecode/scoring/helpfulness_scores.py b/sourcecode/scoring/helpfulness_scores.py index 018be6a1..d5ea9262 100644 --- a/sourcecode/scoring/helpfulness_scores.py +++ b/sourcecode/scoring/helpfulness_scores.py @@ -75,8 +75,8 @@ def compute_general_helpfulness_scores( ratings: Optional[pd.DataFrame] = None, tagConsensusHarassmentAbuseNotes: Optional[pd.DataFrame] = None, tagConsensusHarassmentHelpfulRatingPenalty=10, - multiplyPenaltyByHarassmentScore: bool = False, - minimumHarassmentScoreToPenalize: float = 2.5, + multiplyPenaltyByHarassmentScore: bool = True, + minimumHarassmentScoreToPenalize: float = 2.0, ) -> pd.DataFrame: """Given notes scored by matrix factorization, compute helpfulness scores. Author helpfulness scores are based on the scores of the notes you wrote. diff --git a/sourcecode/scoring/mf_base_scorer.py b/sourcecode/scoring/mf_base_scorer.py index c2448a29..0db9ac34 100644 --- a/sourcecode/scoring/mf_base_scorer.py +++ b/sourcecode/scoring/mf_base_scorer.py @@ -102,7 +102,7 @@ def __init__( crnhThresholdNMIntercept: float = -0.15, crnhThresholdUCBIntercept: float = -0.04, crhSuperThreshold: float = 0.5, - lowDiligenceThreshold: float = 0.217, + lowDiligenceThreshold: float = 0.263, factorThreshold: float = 0.5, inertiaDelta: float = 0.01, useStableInitialization: bool = True, @@ -117,6 +117,9 @@ def __init__( globalInterceptLambda=None, diamondLambda=None, normalizedLossHyperparameters=None, + multiplyPenaltyByHarassmentScore: bool = True, + minimumHarassmentScoreToPenalize: float = 2.0, + tagConsensusHarassmentHelpfulRatingPenalty: int = 10, ): """Configure MatrixFactorizationScorer object. @@ -174,6 +177,9 @@ def __init__( self._maxFinalMFTrainError = maxFinalMFTrainError self._lowDiligenceThreshold = lowDiligenceThreshold self._factorThreshold = factorThreshold + self.multiplyPenaltyByHarassmentScore = multiplyPenaltyByHarassmentScore + self.minimumHarassmentScoreToPenalize = minimumHarassmentScoreToPenalize + self.tagConsensusHarassmentHelpfulRatingPenalty = tagConsensusHarassmentHelpfulRatingPenalty mfArgs = dict( [ pair @@ -460,6 +466,7 @@ def _score_notes_and_users( c.notHelpfulSpamHarassmentOrAbuseTagKey, noteParamsUnfiltered, raterParamsUnfiltered, + name="harassment", ) # Assigns contributor (author & rater) helpfulness bit based on (1) performance @@ -481,6 +488,9 @@ def _score_notes_and_users( self._minRaterAgreeRatio, ratings=ratingsForTraining, tagConsensusHarassmentAbuseNotes=harassmentAbuseNoteParams, + tagConsensusHarassmentHelpfulRatingPenalty=self.tagConsensusHarassmentHelpfulRatingPenalty, + multiplyPenaltyByHarassmentScore=self.multiplyPenaltyByHarassmentScore, + minimumHarassmentScoreToPenalize=self.minimumHarassmentScoreToPenalize, ) # Filters ratings matrix to include only rows (ratings) where the rater was diff --git a/sourcecode/scoring/mf_group_scorer.py b/sourcecode/scoring/mf_group_scorer.py index 6a34533b..22b94859 100644 --- a/sourcecode/scoring/mf_group_scorer.py +++ b/sourcecode/scoring/mf_group_scorer.py @@ -119,8 +119,11 @@ def __init__( crnhThresholdNoteFactorMultiplier: float = -0.8, crnhThresholdNMIntercept: float = -0.15, crhSuperThreshold: float = 0.5, - lowDiligenceThreshold: float = 0.217, + lowDiligenceThreshold: float = 0.263, factorThreshold: float = 0.5, + multiplyPenaltyByHarassmentScore: bool = True, + minimumHarassmentScoreToPenalize: float = 2.0, + tagConsensusHarassmentHelpfulRatingPenalty: int = 10, ) -> None: """Configure MFGroupScorer object. @@ -161,6 +164,9 @@ def __init__( crhSuperThreshold=crhSuperThreshold, lowDiligenceThreshold=lowDiligenceThreshold, factorThreshold=factorThreshold, + multiplyPenaltyByHarassmentScore=multiplyPenaltyByHarassmentScore, + minimumHarassmentScoreToPenalize=minimumHarassmentScoreToPenalize, + tagConsensusHarassmentHelpfulRatingPenalty=tagConsensusHarassmentHelpfulRatingPenalty, ) assert groupNumber > 0, "groupNumber must be positive. 0 is reserved for unassigned." assert groupNumber <= groupScorerCount, "groupNumber exceeds maximum expected groups." diff --git a/sourcecode/scoring/note_ratings.py b/sourcecode/scoring/note_ratings.py index 5f71f8e3..922e4aea 100644 --- a/sourcecode/scoring/note_ratings.py +++ b/sourcecode/scoring/note_ratings.py @@ -366,7 +366,7 @@ def compute_scored_notes( is_crh_function: Callable[..., pd.Series] = is_crh, is_crnh_diamond_function: Callable[..., pd.Series] = is_crnh_diamond, is_crnh_ucb_function: Callable[..., pd.Series] = is_crnh_ucb, - lowDiligenceThreshold: float = 0.217, + lowDiligenceThreshold: float = 0.263, factorThreshold: float = 0.5, ) -> pd.DataFrame: """ diff --git a/sourcecode/scoring/process_data.py b/sourcecode/scoring/process_data.py index a3a2d3b6..dca467fd 100644 --- a/sourcecode/scoring/process_data.py +++ b/sourcecode/scoring/process_data.py @@ -147,15 +147,28 @@ def read_from_tsv( if userEnrollmentPath is None: userEnrollment = None else: - userEnrollment = tsv_reader( - userEnrollmentPath, c.userEnrollmentTSVTypeMapping, c.userEnrollmentTSVColumns, header=headers - ) - assert len(userEnrollment.columns.values) == len(c.userEnrollmentTSVColumns) and all( - userEnrollment.columns == c.userEnrollmentTSVColumns - ), ( - f"userEnrollment columns don't match: \n{[col for col in userEnrollment.columns if not col in c.userEnrollmentTSVColumns]} are extra columns, " - + f"\n{[col for col in c.userEnrollmentTSVColumns if not col in userEnrollment.columns]} are missing." - ) + try: + userEnrollment = tsv_reader( + userEnrollmentPath, + c.userEnrollmentTSVTypeMapping, + c.userEnrollmentTSVColumns, + header=headers, + ) + assert len(userEnrollment.columns.values) == len(c.userEnrollmentTSVColumns) and all( + userEnrollment.columns == c.userEnrollmentTSVColumns + ), ( + f"userEnrollment columns don't match: \n{[col for col in userEnrollment.columns if not col in c.userEnrollmentTSVColumns]} are extra columns, " + + f"\n{[col for col in c.userEnrollmentTSVColumns if not col in userEnrollment.columns]} are missing." + ) + except ValueError: + # TODO: clean up fallback for old mappings once numberOfTimesEarnedOut column is in production + userEnrollment = tsv_reader( + userEnrollmentPath, + c.userEnrollmentTSVTypeMappingOld, + c.userEnrollmentTSVColumnsOld, + header=headers, + ) + userEnrollment[c.numberOfTimesEarnedOutKey] = 0 return notes, ratings, noteStatusHistory, userEnrollment diff --git a/sourcecode/scoring/run_scoring.py b/sourcecode/scoring/run_scoring.py index 2ee8dadb..94e9fac7 100644 --- a/sourcecode/scoring/run_scoring.py +++ b/sourcecode/scoring/run_scoring.py @@ -104,6 +104,9 @@ def _get_scorers( crnhThresholdNMIntercept=-0.02, lowDiligenceThreshold=1000, factorThreshold=0.4, + multiplyPenaltyByHarassmentScore=False, + minimumHarassmentScoreToPenalize=2.5, + tagConsensusHarassmentHelpfulRatingPenalty=10, ) ) @@ -537,6 +540,7 @@ def _compute_helpfulness_scores( c.successfulRatingNeededToEarnIn, c.authorTopNotHelpfulTagValues, c.isEmergingWriterKey, + c.numberOfTimesEarnedOutKey, ] ], on=c.raterParticipantIdKey, diff --git a/sourcecode/scoring/tag_consensus.py b/sourcecode/scoring/tag_consensus.py index 21cd2ca6..6c1e4b8a 100644 --- a/sourcecode/scoring/tag_consensus.py +++ b/sourcecode/scoring/tag_consensus.py @@ -1,3 +1,5 @@ +from typing import Optional + from . import constants as c, process_data from .matrix_factorization.matrix_factorization import MatrixFactorization @@ -10,7 +12,7 @@ def train_tag_model( helpfulModelNoteParams: pd.DataFrame = None, helpfulModelRaterParams: pd.DataFrame = None, useSigmoidCrossEntropy: bool = True, - name: str = "harassment", + name: Optional[str] = None, ): print(f"-------------------Training for tag {tag}-------------------") ratingDataForTag, labelColName = prepare_tag_data(ratings, tag) @@ -62,6 +64,8 @@ def train_tag_model( noteInit=helpfulModelNoteParams, ) + if name is None: + name = tag.split("elpful")[-1] noteParams.columns = [col.replace("internal", name) for col in noteParams.columns] raterParams.columns = [col.replace("internal", name) for col in raterParams.columns] return noteParams, raterParams, globalBias