From cf3dd25175597bd25ab3b58fec9a582a96b8e872 Mon Sep 17 00:00:00 2001 From: Brad Miller Date: Thu, 27 Jul 2023 09:34:37 -0700 Subject: [PATCH 1/2] group modling and other assorted updates --- sourcecode/main.py | 25 +- sourcecode/scoring/constants.py | 86 +++++-- sourcecode/scoring/enums.py | 33 +++ sourcecode/scoring/incorrect_filter.py | 4 +- sourcecode/scoring/matrix_factorization.py | 147 ++++++++--- sourcecode/scoring/mf_base_scorer.py | 7 +- sourcecode/scoring/mf_coverage_scorer.py | 76 ------ sourcecode/scoring/mf_group_scorer.py | 285 +++++++++++++++++++++ sourcecode/scoring/note_ratings.py | 21 +- sourcecode/scoring/process_data.py | 42 ++- sourcecode/scoring/run_scoring.py | 251 +++++++++++++++--- sourcecode/scoring/scorer.py | 66 ++++- sourcecode/scoring/scoring_rules.py | 149 +++++++---- 13 files changed, 956 insertions(+), 236 deletions(-) create mode 100644 sourcecode/scoring/enums.py delete mode 100644 sourcecode/scoring/mf_coverage_scorer.py create mode 100644 sourcecode/scoring/mf_group_scorer.py diff --git a/sourcecode/main.py b/sourcecode/main.py index a10a68ba..fcf3bd75 100644 --- a/sourcecode/main.py +++ b/sourcecode/main.py @@ -14,6 +14,7 @@ import os import scoring.constants as c +from scoring.enums import scorers_from_csv from scoring.process_data import get_data, write_tsv_local from scoring.run_scoring import run_scoring @@ -59,12 +60,28 @@ def parse_args(): ) parser.set_defaults(pseudoraters=True) parser.add_argument("-r", "--ratings", default=c.ratingsInputPath, help="rating dataset") + parser.add_argument( + "--scorers", default=None, type=scorers_from_csv, help="CSV list of scorers to enable." + ) parser.add_argument( "--seed", default=None, type=int, help="set to an int to seed matrix factorization" ) parser.add_argument( "-s", "--status", default=c.noteStatusHistoryInputPath, help="note status history dataset" ) + parser.add_argument( + "--strict-columns", + dest="strict_columns", + help="Explicitly select columns and require that expected columns are present.", + action="store_true", + ) + parser.add_argument( + "--nostrict-columns", + help="Disable validation of expected columns and allow unexpected columns.", + action="store_false", + dest="strict_columns", + ) + parser.set_defaults(strict_columns=True) return parser.parse_args() @@ -82,7 +99,13 @@ def main(): # Invoke scoring and user contribution algorithms. scoredNotes, helpfulnessScores, newStatus, auxNoteInfo = run_scoring( - ratings, statusHistory, userEnrollment, seed=args.seed, pseudoraters=args.pseudoraters + ratings, + statusHistory, + userEnrollment, + seed=args.seed, + pseudoraters=args.pseudoraters, + enabledScorers=args.scorers, + strictColumns=args.strict_columns, ) # Write outputs to local disk. diff --git a/sourcecode/scoring/constants.py b/sourcecode/scoring/constants.py index 497c3d44..51f213bc 100644 --- a/sourcecode/scoring/constants.py +++ b/sourcecode/scoring/constants.py @@ -1,6 +1,7 @@ import time import numpy as np +import pandas as pd # Store the timestamp at which the constants module is initialized. Note @@ -15,6 +16,10 @@ maxTrainError = 0.09 +coreFlipPct = 0.15 +expansionFlipPct = 0.19 +maxReruns = 5 + # Explanation Tags minRatingsToGetTag = 2 minTagsNeededForStatus = 2 @@ -37,6 +42,7 @@ summaryKey = "summary" authorTopNotHelpfulTagValues = "authorTopNotHelpfulTagValues" modelingPopulationKey = "modelingPopulation" +modelingGroupKey = "modelingGroup" # TSV Values notHelpfulValueTsv = "NOT_HELPFUL" @@ -123,6 +129,14 @@ def rater_factor_key(i): coverageRatingStatusKey = "coverageRatingStatus" coverageNoteInterceptMaxKey = "coverageNoteInterceptMax" coverageNoteInterceptMinKey = "coverageNoteInterceptMin" +# Group Model +groupNoteInterceptKey = "groupNoteIntercept" +groupNoteFactor1Key = "groupNoteFactor1" +groupRatingStatusKey = "groupRatingStatus" +groupNoteInterceptMaxKey = "groupNoteInterceptMax" +groupNoteInterceptMinKey = "groupNoteInterceptMin" +groupRaterInterceptKey = "groupRaterIntercept" +groupRaterFactor1Key = "groupRaterFactor1" # Ids and Indexes noteIdKey = "noteId" @@ -216,12 +230,10 @@ def rater_factor_key(i): incorrectFilterColumns = [ "notHelpfulIncorrect_interval", - "notHelpfulIncorrect_total_interval", "cnt_interval", "num_voters_interval", "tf_idf_incorrect_interval", "notHelpfulIncorrect_same", - "notHelpfulIncorrect_total_same", "cnt_same", "num_voters_same", "tf_idf_incorrect_same", @@ -261,11 +273,7 @@ def rater_factor_key(i): ] + misleadingTagsAndTypes + notMisleadingTagsAndTypes - + [ - ("trustworthySources", np.int64), - (summaryKey, np.object), - ("isMediaNote", np.int64) - ] + + [("trustworthySources", np.int64), (summaryKey, np.object), ("isMediaNote", np.int64)] ) noteTSVColumns = [col for (col, dtype) in noteTSVColumnsAndTypes] noteTSVTypes = [dtype for (col, dtype) in noteTSVColumnsAndTypes] @@ -366,6 +374,16 @@ def rater_factor_key(i): userEnrollmentTSVTypes = [dtype for (_, dtype) in userEnrollmentTSVColumnsAndTypes] userEnrollmentTSVTypeMapping = {col: dtype for (col, dtype) in userEnrollmentTSVColumnsAndTypes} +# TODO: delete expanded user enrollment definition once modeling group is fully rolled out +userEnrollmentExpandedTSVColumnsAndTypes = userEnrollmentTSVColumnsAndTypes + [ + (modelingGroupKey, np.float64) +] +userEnrollmentExpandedTSVColumns = [col for (col, _) in userEnrollmentExpandedTSVColumnsAndTypes] +userEnrollmentExpandedTSVTypes = [dtype for (_, dtype) in userEnrollmentExpandedTSVColumnsAndTypes] +userEnrollmentExpandedTSVTypeMapping = { + col: dtype for (col, dtype) in userEnrollmentExpandedTSVColumnsAndTypes +} + noteInterceptMaxKey = "internalNoteIntercept_max" noteInterceptMinKey = "internalNoteIntercept_min" noteParameterUncertaintyTSVMainColumnsAndTypes = [ @@ -423,6 +441,16 @@ def rater_factor_key(i): + incorrectFilterColumns ) +deprecatedNoteModelOutputColumns = frozenset( + { + coverageNoteInterceptKey, + coverageNoteFactor1Key, + coverageRatingStatusKey, + coverageNoteInterceptMinKey, + coverageNoteInterceptMaxKey, + } +) + noteModelOutputTSVColumnsAndTypes = [ (noteIdKey, np.int64), (coreNoteInterceptKey, np.double), @@ -453,9 +481,20 @@ def rater_factor_key(i): (expansionNoteInterceptMaxKey, np.double), (coverageNoteInterceptMinKey, np.double), (coverageNoteInterceptMaxKey, np.double), + (groupNoteInterceptKey, np.double), + (groupNoteFactor1Key, np.double), + (groupRatingStatusKey, np.str), + (groupNoteInterceptMaxKey, np.double), + (groupNoteInterceptMinKey, np.double), + (modelingGroupKey, np.float64), ] noteModelOutputTSVColumns = [col for (col, dtype) in noteModelOutputTSVColumnsAndTypes] noteModelOutputTSVTypeMapping = {col: dtype for (col, dtype) in noteModelOutputTSVColumnsAndTypes} +deprecatedNoteModelOutputTSVColumnsAndTypes = [ + (col, dtype) + for (col, dtype) in noteModelOutputTSVColumnsAndTypes + if col in deprecatedNoteModelOutputColumns +] raterModelOutputTSVColumnsAndTypes = [ (raterParticipantIdKey, np.int64), @@ -464,25 +503,28 @@ def rater_factor_key(i): (crhCrnhRatioDifferenceKey, np.double), (meanNoteScoreKey, np.double), (raterAgreeRatioKey, np.double), - (successfulRatingHelpfulCount, np.int64), - (successfulRatingNotHelpfulCount, np.int64), - (successfulRatingTotal, np.int64), - (unsuccessfulRatingHelpfulCount, np.int64), - (unsuccessfulRatingNotHelpfulCount, np.int64), - (unsuccessfulRatingTotal, np.int64), - (ratingsAwaitingMoreRatings, np.int64), - (ratedAfterDecision, np.int64), - (notesCurrentlyRatedHelpful, np.int64), - (notesCurrentlyRatedNotHelpful, np.int64), - (notesAwaitingMoreRatings, np.int64), + (successfulRatingHelpfulCount, pd.Int64Dtype()), + (successfulRatingNotHelpfulCount, pd.Int64Dtype()), + (successfulRatingTotal, pd.Int64Dtype()), + (unsuccessfulRatingHelpfulCount, pd.Int64Dtype()), + (unsuccessfulRatingNotHelpfulCount, pd.Int64Dtype()), + (unsuccessfulRatingTotal, pd.Int64Dtype()), + (ratingsAwaitingMoreRatings, pd.Int64Dtype()), + (ratedAfterDecision, pd.Int64Dtype()), + (notesCurrentlyRatedHelpful, pd.Int64Dtype()), + (notesCurrentlyRatedNotHelpful, pd.Int64Dtype()), + (notesAwaitingMoreRatings, pd.Int64Dtype()), (enrollmentState, np.int32), - (successfulRatingNeededToEarnIn, np.int64), + (successfulRatingNeededToEarnIn, pd.Int64Dtype()), (authorTopNotHelpfulTagValues, np.str), - (timestampOfLastStateChange, np.int64), - (aboveHelpfulnessThresholdKey, np.bool_), + (timestampOfLastStateChange, np.double), + (aboveHelpfulnessThresholdKey, np.float64), # nullable bool (isEmergingWriterKey, np.bool_), - (aggregateRatingReceivedTotal, np.int64), + (aggregateRatingReceivedTotal, pd.Int64Dtype()), (timestampOfLastEarnOut, np.double), + (groupRaterInterceptKey, np.double), + (groupRaterFactor1Key, np.double), + (modelingGroupKey, np.float64), ] raterModelOutputTSVColumns = [col for (col, dtype) in raterModelOutputTSVColumnsAndTypes] raterModelOutputTSVTypeMapping = {col: dtype for (col, dtype) in raterModelOutputTSVColumnsAndTypes} diff --git a/sourcecode/scoring/enums.py b/sourcecode/scoring/enums.py new file mode 100644 index 00000000..2e92df1f --- /dev/null +++ b/sourcecode/scoring/enums.py @@ -0,0 +1,33 @@ +from enum import auto, Enum +from typing import Set + + +class Scorers(Enum): + """Exhaustive list of all scorers to simplify setting enabled/disabled scorers.""" + + MFCoreScorer = auto() + MFExpansionScorer = auto() + # Note that the MFGroupScorer value controls whether *all* group scorers are instantiated, + # not just a single MFGroupScorer instance. + MFGroupScorer = auto() + + +def scorers_from_csv(csv: str) -> Set[Scorers]: + """Converts a CSV of enums to an actual set of Enum values. + + Args: + csv: CSV string of Scorer names. + + Returns: + Set containing Scorers. + + Raises: + ValueError if csv contains a token which is not a valid Scorer. + """ + values = [] + for value in csv.split(","): + try: + values.append(Scorers[value]) + except KeyError: + raise ValueError(f"Unknown value {value}") + return set(values) diff --git a/sourcecode/scoring/incorrect_filter.py b/sourcecode/scoring/incorrect_filter.py index 3706087b..f8e3e6c2 100644 --- a/sourcecode/scoring/incorrect_filter.py +++ b/sourcecode/scoring/incorrect_filter.py @@ -53,6 +53,8 @@ def _get_incorrect_tfidf_ratio( suffix: suffix for incorrect and count column names for this filter Returns: + pd.DataFrame with one row for each note, with computed sum(tf_idf_incorrect) score for raters + included in filter """ ratings_w_user_totals = augmented_ratings[user_filter] @@ -67,7 +69,7 @@ def _get_incorrect_tfidf_ratio( ) / np.log( 1 + (rating_aggs_w_cnt["notHelpfulIncorrect_total"] / rating_aggs_w_cnt["cnt"]) ) # p(incorrect over all rater ratings) - rating_aggs_w_cnt.drop("notHelpfulIncorrect_total", axis=1) + rating_aggs_w_cnt.drop("notHelpfulIncorrect_total", inplace=True, axis=1) rating_aggs_w_cnt.columns = [c.noteIdKey] + [ f"{col}{suffix}" for col in rating_aggs_w_cnt.columns[1:] ] diff --git a/sourcecode/scoring/matrix_factorization.py b/sourcecode/scoring/matrix_factorization.py index 5ce33899..5bd1dac6 100644 --- a/sourcecode/scoring/matrix_factorization.py +++ b/sourcecode/scoring/matrix_factorization.py @@ -251,6 +251,72 @@ def _create_mf_model( return mf_model, optimizer, criterion + def _get_train_validate_sets( + self, + row: torch.LongTensor, + col: torch.LongTensor, + rating: torch.FloatTensor, + validate_percent: Optional[float] = None, + ) -> Tuple[ + torch.LongTensor, + torch.LongTensor, + torch.FloatTensor, + torch.LongTensor, + torch.LongTensor, + torch.FloatTensor, + ]: + if validate_percent is not None: + random_indices = np.random.permutation(np.arange(len(row))) + validate_indices = random_indices[: int(validate_percent * len(row))] + train_indices = random_indices[int(validate_percent * len(row)) :] + row_validate = row[validate_indices] + col_validate = col[validate_indices] + rating_validate = rating[validate_indices] + row_train = row[train_indices] + col_train = col[train_indices] + rating_train = rating[train_indices] + else: + row_train = row + col_train = col + rating_train = rating + row_validate = None + col_validate = None + rating_validate = None + return row_train, col_train, rating_train, row_validate, col_validate, rating_validate + + def _compute_and_print_loss( + self, + mf_model: BiasedMatrixFactorization, + loss_value: float, + criterion: torch.nn.modules.loss._Loss, + row_train: torch.LongTensor, + col_train: torch.LongTensor, + rating_train: torch.FloatTensor, + row_validate: Optional[torch.LongTensor], + col_validate: Optional[torch.LongTensor], + rating_validate: Optional[torch.FloatTensor], + epoch: int, + final: bool = False, + ) -> Tuple[float, float, Optional[float]]: + y_pred = mf_model(row_train, col_train) + train_loss_value = criterion(y_pred, rating_train).item() + if row_validate is not None: + y_pred_validate = mf_model(row_validate, col_validate) + validate_loss_value = criterion(y_pred_validate, rating_validate).item() + else: + validate_loss_value = None + + if self._logging: + print("epoch", epoch, loss_value) + print("TRAIN FIT LOSS: ", train_loss_value) + if validate_loss_value is not None: + print("VALIDATE FIT LOSS: ", validate_loss_value) + + if final == True: + self.test_errors.append(loss_value) + self.train_errors.append(train_loss_value) + return train_loss_value, loss_value, validate_loss_value + def _fit_model( self, mf_model: BiasedMatrixFactorization, @@ -259,7 +325,8 @@ def _fit_model( row: torch.LongTensor, col: torch.LongTensor, rating: torch.FloatTensor, - ) -> None: + validate_percent: Optional[float] = None, + ) -> Tuple[float, float, Optional[float]]: """Run gradient descent to train the model. Args: @@ -270,23 +337,20 @@ def _fit_model( col (torch.LongTensor) rating (torch.FloatTensor) """ - l2_lambda_intercept = self._l2_lambda * self._l2_intercept_multiplier - - def print_loss(final=False): - y_pred = mf_model(row, col) - train_loss = criterion(y_pred, rating) - - if self._logging: - print("epoch", epoch, loss.item()) - print("TRAIN FIT LOSS: ", train_loss.item()) - if final == True: - self.test_errors.append(loss.item()) - self.train_errors.append(train_loss.item()) + ( + row_train, + col_train, + rating_train, + row_validate, + col_validate, + rating_validate, + ) = self._get_train_validate_sets(row, col, rating, validate_percent) + l2_lambda_intercept = self._l2_lambda * self._l2_intercept_multiplier prev_loss = 1e10 - y_pred = mf_model(row, col) - loss = criterion(y_pred, rating) + y_pred = mf_model(row_train, col_train) + loss = criterion(y_pred, rating_train) l2_reg_loss = torch.tensor(0.0).to(mf_model.device) for name, param in mf_model.named_parameters(): @@ -299,7 +363,9 @@ def print_loss(final=False): epoch = 0 - while abs(prev_loss - loss.item()) > self._convergence: + while (abs(loss.item() - prev_loss) > self._convergence) and ( + not (epoch > 100 and loss.item() > prev_loss) + ): prev_loss = loss.item() @@ -313,8 +379,8 @@ def print_loss(final=False): optimizer.zero_grad() # Predict and calculate loss - y_pred = mf_model(row, col) - loss = criterion(y_pred, rating) + y_pred = mf_model(row_train, col_train) + loss = criterion(y_pred, rating_train) l2_reg_loss = torch.tensor(0.0).to(mf_model.device) for name, param in mf_model.named_parameters(): @@ -326,13 +392,37 @@ def print_loss(final=False): loss += l2_reg_loss if epoch % 50 == 0: - print_loss() + self._compute_and_print_loss( + mf_model, + loss.item(), + criterion, + row_train, + col_train, + rating_train, + row_validate, + col_validate, + rating_validate, + epoch, + final=False, + ) epoch += 1 if self._logging: print("Num epochs:", epoch) - print_loss(final=True) + return self._compute_and_print_loss( + mf_model, + loss.item(), + criterion, + row_train, + col_train, + rating_train, + row_validate, + col_validate, + rating_validate, + epoch, + final=True, + ) def run_mf( self, @@ -341,7 +431,8 @@ def run_mf( userInit: pd.DataFrame = None, globalInterceptInit: Optional[float] = None, specificNoteId: Optional[int] = None, - ) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[float]]: + validatePercent: Optional[float] = None, + ): """Train matrix factorization model. See https://twitter.github.io/communitynotes/ranking-notes/#matrix-factorization @@ -387,13 +478,8 @@ def run_mf( row = torch.LongTensor(noteRatingIds[_raterIndexKey].values).to(mf_model.device) col = torch.LongTensor(noteRatingIds[_noteIndexKey].values).to(mf_model.device) - self._fit_model( - mf_model, - optimizer, - criterion, - row, - col, - rating, + train_loss, loss, validate_loss = self._fit_model( + mf_model, optimizer, criterion, row, col, rating, validatePercent ) assert mf_model.item_factors.weight.data.cpu().numpy().shape[0] == noteIdMap.shape[0] @@ -409,7 +495,10 @@ def run_mf( ) fitRaterParams.drop(_raterIndexKey, axis=1, inplace=True) - return fitNoteParams, fitRaterParams, globalIntercept + if validatePercent is None: + return fitNoteParams, fitRaterParams, globalIntercept + else: + return fitNoteParams, fitRaterParams, globalIntercept, train_loss, loss, validate_loss def _flip_factors_for_identification( self, noteIdMap: pd.DataFrame, raterIdMap: pd.DataFrame diff --git a/sourcecode/scoring/mf_base_scorer.py b/sourcecode/scoring/mf_base_scorer.py index a005f7dc..d301ddab 100644 --- a/sourcecode/scoring/mf_base_scorer.py +++ b/sourcecode/scoring/mf_base_scorer.py @@ -30,7 +30,7 @@ def __init__( crnhThresholdNoteFactorMultiplier: float = -0.8, crnhThresholdNMIntercept: float = -0.15, crnhThresholdUCBIntercept: float = -0.04, - crhThresholdLCBIntercept: float = 0.31, + crhThresholdLCBIntercept: float = 0.32, crhSuperThreshold: float = 0.5, inertiaDelta: float = 0.01, weightedTotalVotes: float = 1.0, @@ -87,6 +87,10 @@ def __init__( self._weightedTotalVotes = weightedTotalVotes self._mfRanker = matrix_factorization.MatrixFactorization() + def get_crh_threshold(self) -> float: + """Return CRH threshold for general scoring logic.""" + return self._crhThreshold + def get_scored_notes_cols(self) -> List[str]: """Returns a list of columns which should be present in the scoredNotes output.""" return [ @@ -160,6 +164,7 @@ def _score_notes_and_users( userScores pd.DataFrame: one row per user containing a column for each helpfulness score. """ if self._seed is not None: + print(f"seeding with {self._seed}") torch.manual_seed(self._seed) # Removes ratings where either (1) the note did not receive enough ratings, or diff --git a/sourcecode/scoring/mf_coverage_scorer.py b/sourcecode/scoring/mf_coverage_scorer.py deleted file mode 100644 index 8287bc78..00000000 --- a/sourcecode/scoring/mf_coverage_scorer.py +++ /dev/null @@ -1,76 +0,0 @@ -from typing import Dict, List, Optional - -from . import constants as c, matrix_factorization -from .mf_core_scorer import MFCoreScorer - - -class MFCoverageScorer(MFCoreScorer): - def __init__(self, seed: Optional[int] = None) -> None: - """Configure MFCoverageScorer object. - - Args: - seed: if not None, seed value to ensure deterministic execution - """ - super().__init__(seed) - self._crhThreshold = 0.338 - self._pseudoraters = False - self._mfRanker = matrix_factorization.MatrixFactorization(l2_intercept_multiplier=7) - - def _get_note_col_mapping(self) -> Dict[str, str]: - """Returns a dict mapping default note column names to custom names for a specific model.""" - return { - c.internalNoteInterceptKey: c.coverageNoteInterceptKey, - c.internalNoteFactor1Key: c.coverageNoteFactor1Key, - c.internalRatingStatusKey: c.coverageRatingStatusKey, - c.noteInterceptMinKey: c.coverageNoteInterceptMinKey, - c.noteInterceptMaxKey: c.coverageNoteInterceptMaxKey, - } - - def _get_user_col_mapping(self) -> Dict[str, str]: - """Returns a dict mapping default user column names to custom names for a specific model.""" - return {} - - def get_scored_notes_cols(self) -> List[str]: - """Returns a list of columns which should be present in the scoredNotes output.""" - return [ - c.noteIdKey, - c.coverageNoteInterceptKey, - c.coverageNoteFactor1Key, - c.coverageRatingStatusKey, - c.coverageNoteInterceptMinKey, - c.coverageNoteInterceptMaxKey, - ] - - def get_helpfulness_scores_cols(self) -> List[str]: - """Returns a list of columns which should be present in the helpfulnessScores output.""" - return [] - - def get_auxiliary_note_info_cols(self) -> List[str]: - """Returns a list of columns which should be present in the auxiliaryNoteInfo output.""" - return [] - - def _get_dropped_note_cols(self) -> List[str]: - """Returns a list of columns which should be excluded from scoredNotes and auxiliaryNoteInfo.""" - return super()._get_dropped_note_cols() + ( - [ - c.internalActiveRulesKey, - c.activeFilterTagsKey, - c.ratingWeightKey, - ] - + c.notHelpfulTagsAdjustedColumns - + c.notHelpfulTagsAdjustedRatioColumns - + c.incorrectFilterColumns - + c.noteParameterUncertaintyTSVAuxColumns - ) - - def _get_dropped_user_cols(self) -> List[str]: - """Returns a list of columns which should be excluded from helpfulnessScores output.""" - return super()._get_dropped_user_cols() + [ - c.raterParticipantIdKey, - c.internalRaterInterceptKey, - c.internalRaterFactor1Key, - c.crhCrnhRatioDifferenceKey, - c.meanNoteScoreKey, - c.raterAgreeRatioKey, - c.aboveHelpfulnessThresholdKey, - ] diff --git a/sourcecode/scoring/mf_group_scorer.py b/sourcecode/scoring/mf_group_scorer.py new file mode 100644 index 00000000..756c2047 --- /dev/null +++ b/sourcecode/scoring/mf_group_scorer.py @@ -0,0 +1,285 @@ +from typing import Dict, List, Optional, Tuple + +from . import constants as c +from .mf_base_scorer import MFBaseScorer + +import numpy as np +import pandas as pd + + +# Number of MFGroupScorer objects we expect to instantiate +groupScorerCount = 12 + + +def _coalesce_columns(df: pd.DataFrame, columnPrefix: str) -> pd.DataFrame: + """Condense all columns beginning with columnPrefix into a single column. + + With each row there must be at most one column with a non-NaN value in the set of + columns beginning with columnPrefix. If a non-NaN value is present that will + become the value in the condensed column, otherwise the value will be NaN. After + column values are condensed the original (prefixed) columns will be dropped. + + Args: + df: DataFrame containing columns to condense + collumnPrefix: Prefix used to detect columns to coalesce, and the name for + the output column. + + Returns: + DataFrame with all columns prefixed by columnPrefix dropped and replaced by + a single column named columnPrefix + + Raises: + AssertionError if multiple columns prefixed by columnPrefix have non-NaN values + for any row. + """ + # Identify columns to coalesce + columns = [col for col in df.columns if col.startswith(f"{columnPrefix}_")] + if not columns: + return df + # Validate that at most one column is set, and store which rows have a column set + rowResults = np.invert(df[columns].isna()).sum(axis=1) + assert all(rowResults <= 1), "each row should only be in one modeling group" + # Coalesce results + def _get_value(row): + idx = row.first_valid_index() + return row[idx] if idx is not None else np.nan + + coalesced = df[columns].apply(_get_value, axis=1) + # Drop old columns and replace with new + df = df.drop(columns=columns) + df[columnPrefix] = coalesced + return df + + +def coalesce_group_models( + scoredNotes: pd.DataFrame, helpfulnessScores: pd.DataFrame +) -> Tuple[pd.DataFrame, pd.DataFrame]: + """Coalesce all group modeling columns across note and user scoring. + + Since each Scorer must have distinct output columns, we use coalescing to run + multiple instances of MFGroupScorer objects and then condense the results into + a single set of columns. This approach works because each note will be scored + by at most one MFGroupScorer instance. + + Args: + scoredNotes: scoring output for notes. + helpfulnessScores: scoring output for users. + + Returns: + tuple containing coalesced scoring results for notes and users. + """ + for col in [ + c.groupNoteInterceptKey, + c.groupNoteFactor1Key, + c.groupRatingStatusKey, + c.groupNoteInterceptMaxKey, + c.groupNoteInterceptMinKey, + c.modelingGroupKey, + ]: + scoredNotes = _coalesce_columns(scoredNotes, col) + + for col in [c.groupRaterInterceptKey, c.groupRaterFactor1Key, c.modelingGroupKey]: + helpfulnessScores = _coalesce_columns(helpfulnessScores, col) + + return scoredNotes, helpfulnessScores + + +class MFGroupScorer(MFBaseScorer): + def __init__( + self, + groupNumber: int, + seed: Optional[int] = None, + pseudoraters: Optional[bool] = False, + groupThreshold: float = 0.8, + ) -> None: + """Configure MFGroupScorer object. + + Notice that each MFGroupScorer defines column names by appending the groupNumber to + column prefixes which are constant. Dynamically defining the column names allows the + group scorer to be instantiated multiple times while maintaining the property that + the columns attached by each scorer remain unique. Once all scorers have ran, we + (will) validate that each note was scored by at most one group scorer and then coalesce + all of the group scoring columns and remove the groupNumber suffix. + + Args: + groupNumber: int indicating which group this scorer instance should filter for. + seed: if not None, seed value to ensure deterministic execution + pseudoraters: if True, compute optional pseudorater confidence intervals + groupThreshold: float indicating what fraction of ratings must be from within a group + for the model to be active + """ + super().__init__(seed, pseudoraters) + assert groupNumber > 0, "groupNumber must be positive. 0 is reserved for unassigned." + assert groupNumber <= groupScorerCount, "groupNumber exceeds maximum expected groups." + self._groupNumber = groupNumber + self._groupThreshold = groupThreshold + self._groupNoteInterceptKey = f"{c.groupNoteInterceptKey}_{self._groupNumber}" + self._groupNoteFactor1Key = f"{c.groupNoteFactor1Key}_{self._groupNumber}" + self._groupRatingStatusKey = f"{c.groupRatingStatusKey}_{self._groupNumber}" + self._groupNoteInterceptMaxKey = f"{c.groupNoteInterceptMaxKey}_{self._groupNumber}" + self._groupNoteInterceptMinKey = f"{c.groupNoteInterceptMinKey}_{self._groupNumber}" + self._groupRaterInterceptKey = f"{c.groupRaterInterceptKey}_{self._groupNumber}" + self._groupRaterFactor1Key = f"{c.groupRaterFactor1Key}_{self._groupNumber}" + self._modelingGroupKey = f"{c.modelingGroupKey}_{self._groupNumber}" + + def _get_note_col_mapping(self) -> Dict[str, str]: + """Returns a dict mapping default note column names to custom names for a specific model.""" + return { + c.internalNoteInterceptKey: self._groupNoteInterceptKey, + c.internalNoteFactor1Key: self._groupNoteFactor1Key, + c.internalRatingStatusKey: self._groupRatingStatusKey, + c.noteInterceptMinKey: self._groupNoteInterceptMinKey, + c.noteInterceptMaxKey: self._groupNoteInterceptMaxKey, + } + + def _get_user_col_mapping(self) -> Dict[str, str]: + """Returns a dict mapping default user column names to custom names for a specific model.""" + return { + c.internalRaterInterceptKey: self._groupRaterInterceptKey, + c.internalRaterFactor1Key: self._groupRaterFactor1Key, + } + + def get_scored_notes_cols(self) -> List[str]: + """Returns a list of columns which should be present in the scoredNotes output.""" + return [ + c.noteIdKey, + self._groupNoteInterceptKey, + self._groupNoteFactor1Key, + self._groupRatingStatusKey, + self._groupNoteInterceptMaxKey, + self._groupNoteInterceptMinKey, + self._modelingGroupKey, + ] + + def get_helpfulness_scores_cols(self) -> List[str]: + """Returns a list of columns which should be present in the helpfulnessScores output.""" + return [ + c.raterParticipantIdKey, + self._groupRaterInterceptKey, + self._groupRaterFactor1Key, + self._modelingGroupKey, + ] + + def get_auxiliary_note_info_cols(self) -> List[str]: + """Returns a list of columns which should be present in the auxiliaryNoteInfo output.""" + return [] + + def _get_dropped_note_cols(self) -> List[str]: + """Returns a list of columns which should be excluded from scoredNotes and auxiliaryNoteInfo.""" + return super()._get_dropped_note_cols() + ( + [ + c.internalActiveRulesKey, + c.activeFilterTagsKey, + c.ratingWeightKey, + ] + + c.notHelpfulTagsAdjustedColumns + + c.notHelpfulTagsAdjustedRatioColumns + + c.incorrectFilterColumns + + c.noteParameterUncertaintyTSVAuxColumns + ) + + def _get_dropped_user_cols(self) -> List[str]: + """Returns a list of columns which should be excluded from helpfulnessScores output.""" + return super()._get_dropped_user_cols() + [ + c.crhCrnhRatioDifferenceKey, + c.meanNoteScoreKey, + c.raterAgreeRatioKey, + c.aboveHelpfulnessThresholdKey, + ] + + def _filter_input( + self, ratings: pd.DataFrame, noteStatusHistory: pd.DataFrame, userEnrollment: pd.DataFrame + ) -> Tuple[pd.DataFrame, pd.DataFrame]: + """Prune the contents of ratings to only include ratings from users in the modeling group. + + This function identifies the subset of ratings to include in group model scoring. + To improve modeling within the group, we only include ratings from users in the modeling + group. However, we place no restriction on which notes to include in the model and instead + include ratings on any note. Including ratings on any note increases the amount of data + available during training about each user, in effect also increasing the number of users + and notes we are able to include in the model. + + Including notes by users outside of the modeling group means that the model will issue + scores for notes which do not meet group modeling criteria (i.e. >80% of ratings are + from users in the modeling group, and the author is also from the modeling group). We + enforce these criteria *after* scoring in _postprocess_output so that the maximum amount + of ratings are available during scoring. + + Args: + ratings (pd.DataFrame): preprocessed ratings + noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status + userEnrollment (pd.DataFrame): one row per user specifying enrollment properties + + Returns: + Tuple[pd.DataFrame, pd.DataFrame]: + ratings: ratings filtered to only contain rows of interest + noteStatusHistory: noteStatusHistory filtered to only contain rows of interest + """ + userEnrollment = userEnrollment.rename(columns={c.participantIdKey: c.raterParticipantIdKey}) + userEnrollment = userEnrollment[userEnrollment[c.modelingGroupKey] == self._groupNumber] + ratings = ratings.merge(userEnrollment[[c.raterParticipantIdKey]].drop_duplicates()) + return ratings, noteStatusHistory + + def _postprocess_output( + self, + noteScores: pd.DataFrame, + userScores: pd.DataFrame, + ratings: pd.DataFrame, + noteStatusHistory: pd.DataFrame, + userEnrollment: pd.DataFrame, + ) -> Tuple[pd.DataFrame, pd.DataFrame]: + """Filter noteScores and userScores and add column containing modeling group. + + Enforce the requirements that: + - Notes scored by this group model have >=80% of ratings from the modeling group. + - Notes scored by this group model were authored by a member of the modeling group. + - Users assigned a factor / intercept are in the modeling group. + + Args: + noteScores: note outputs from scoring + userScores: user outputs from scoring + ratings (pd.DataFrame): preprocessed ratings + noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status + userEnrollment (pd.DataFrame): one row per user specifying enrollment properties + + Returns: + Tuple[pd.DataFrame, pd.DataFrame]: + noteScores: filtered and updated note scoring output + userScores: filtered and updated user scoring output + """ + # Prune notes according to authorship filter. + noteScores = noteScores.merge( + userEnrollment[[c.participantIdKey, c.modelingGroupKey]].rename( + columns={c.participantIdKey: c.noteAuthorParticipantIdKey} + ), + how="left", + ) + noteScores = noteScores[noteScores[c.modelingGroupKey] == self._groupNumber] + noteScores = noteScores.drop(columns=c.modelingGroupKey) + # Identify notes with enough ratings from within the modeling group. + ratings = ratings.merge( + userEnrollment[[c.participantIdKey, c.modelingGroupKey]].rename( + columns={c.participantIdKey: c.raterParticipantIdKey} + ), + how="left", + ) + ratings["inGroup"] = ratings[c.modelingGroupKey] == self._groupNumber + ratios = ratings[[c.noteIdKey, "inGroup"]].groupby(c.noteIdKey).mean().reset_index() + notesAboveThreshold = ratios[ratios["inGroup"] >= self._groupThreshold][[c.noteIdKey]] + noteScores = noteScores.merge(notesAboveThreshold) + # Note that even though ratings were restricted to the modeling group, users outside of + # the modeling group may still have authored a note which was rated and may consequently + # appear in the userScores. Accordingly, we drop any user which was outside of the + # modeling group from the user scores. + userScores = userScores.merge( + userEnrollment[[c.participantIdKey, c.modelingGroupKey]].rename( + columns={c.participantIdKey: c.raterParticipantIdKey} + ), + how="left", + ) + userScores = userScores[userScores[c.modelingGroupKey] == self._groupNumber] + userScores = userScores.drop(columns=c.modelingGroupKey) + # Set the modelingGroupKey column in each output + noteScores[self._modelingGroupKey] = self._groupNumber + userScores[self._modelingGroupKey] = self._groupNumber + return noteScores, userScores diff --git a/sourcecode/scoring/note_ratings.py b/sourcecode/scoring/note_ratings.py index dad9dcfc..df12a23f 100644 --- a/sourcecode/scoring/note_ratings.py +++ b/sourcecode/scoring/note_ratings.py @@ -21,10 +21,18 @@ def is_crh(scoredNotes, minRatingsNeeded, crhThreshold) -> pd.Series: ) -def is_crh_lcb(scoredNotes, minRatingsNeeded, crhThresholdLCBIntercept) -> pd.Series: +def is_crh_lcb(scoredNotes, minRatingsNeeded, crhThresholdLCBIntercept, inertia=False) -> pd.Series: enoughRatings = scoredNotes[c.numRatingsKey] >= minRatingsNeeded + if c.noteInterceptMinKey in scoredNotes.columns: - return enoughRatings & (scoredNotes[c.noteInterceptMinKey] >= crhThresholdLCBIntercept) + if inertia == False: + return enoughRatings & (scoredNotes[c.noteInterceptMinKey] >= crhThresholdLCBIntercept) + else: + return ( + enoughRatings + & (scoredNotes[c.noteInterceptMinKey] >= crhThresholdLCBIntercept) + & (scoredNotes[c.currentLabelKey] == c.currentlyRatedHelpful) + ) else: # all False return enoughRatings & (~enoughRatings) @@ -489,6 +497,15 @@ def compute_scored_notes( crhThreshold, minRatingsNeeded, ), + scoring_rules.RuleFromFunction( + RuleID.LCB_INERTIA, + {RuleID.GENERAL_CRH}, + c.currentlyRatedHelpful, + lambda noteStats: is_crh_lcb_function( + noteStats, minRatingsNeeded, crhThresholdLCBIntercept - inertiaDelta, True + ), + onlyApplyToNotesThatSayTweetIsMisleading=True, + ), scoring_rules.FilterTagOutliers( RuleID.TAG_OUTLIER, {RuleID.GENERAL_CRH}, diff --git a/sourcecode/scoring/process_data.py b/sourcecode/scoring/process_data.py index bc74a22a..d62cd9c6 100644 --- a/sourcecode/scoring/process_data.py +++ b/sourcecode/scoring/process_data.py @@ -87,7 +87,7 @@ def tsv_parser( firstLine = rawTSV.split("\n")[0] if len(firstLine.split("\t")) != len(columns): raise ValueError - return pd.read_csv( + data = pd.read_csv( StringIO(rawTSV), sep="\t", names=columns, @@ -95,13 +95,39 @@ def tsv_parser( header=0 if header else None, index_col=[], ) + return data except (ValueError, IndexError): raise ValueError("invalid input") -def tsv_reader(path: str, mapping, columns, header=False): +# TODO: remove this function once modelingGroup column is fully launched +def user_enrollment_parser(rawTSV: str, header: bool) -> pd.DataFrame: + """Parse user enrollment TSV and optinoally tolerate the modelingGroup column. + + Args: + rawTSV: str contianing entire TSV input + header: bool indicating whether the input will have a header + + Returns: + pd.DataFrame containing parsed data + """ + try: + df = tsv_parser(rawTSV, c.userEnrollmentTSVTypeMapping, c.userEnrollmentTSVColumns, header) + df[c.modelingGroupKey] = 0 + except ValueError: + df = tsv_parser( + rawTSV, c.userEnrollmentExpandedTSVTypeMapping, c.userEnrollmentExpandedTSVColumns, header + ) + return df + + +# TODO: remove support for specifying a custom parser once modelingGroup is fully rolled out +def tsv_reader(path: str, mapping, columns, header=False, parser=tsv_parser): with open(path, "r") as handle: - return tsv_parser(handle.read(), mapping, columns, header) + if parser == tsv_parser: + return parser(handle.read(), mapping, columns, header) + else: + return parser(handle.read(), header) def read_from_tsv( @@ -162,13 +188,13 @@ def read_from_tsv( userEnrollment = None else: userEnrollment = tsv_reader( - userEnrollmentPath, c.userEnrollmentTSVTypeMapping, c.userEnrollmentTSVColumns, header=headers + userEnrollmentPath, None, None, header=headers, parser=user_enrollment_parser ) - assert len(userEnrollment.columns.values) == len(c.userEnrollmentTSVColumns) and all( - userEnrollment.columns == c.userEnrollmentTSVColumns + assert len(userEnrollment.columns.values) <= len(c.userEnrollmentExpandedTSVColumns) and ( + len(set(userEnrollment.columns) - set(c.userEnrollmentExpandedTSVColumns)) == 0 ), ( - f"userEnrollment columns don't match: \n{[col for col in userEnrollment.columns if not col in c.userEnrollmentTSVColumns]} are extra columns, " - + f"\n{[col for col in c.userEnrollmentTSVColumns if not col in userEnrollment.columns]} are missing." + f"userEnrollment columns don't match: \n{[col for col in userEnrollment.columns if not col in c.userEnrollmentExpandedTSVColumns]} are extra columns, " + + f"\n{[col for col in c.userEnrollmentExpandedTSVColumns if not col in userEnrollment.columns]} are missing." ) return notes, ratings, noteStatusHistory, userEnrollment diff --git a/sourcecode/scoring/run_scoring.py b/sourcecode/scoring/run_scoring.py index b6e02faa..86a5d7f4 100644 --- a/sourcecode/scoring/run_scoring.py +++ b/sourcecode/scoring/run_scoring.py @@ -7,15 +7,17 @@ from collections import namedtuple import concurrent.futures +from itertools import chain import multiprocessing import time -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Set, Tuple from . import constants as c, contributor_state, note_ratings, note_status_history, scoring_rules +from .enums import Scorers from .mf_base_scorer import MFBaseScorer from .mf_core_scorer import MFCoreScorer -from .mf_coverage_scorer import MFCoverageScorer from .mf_expansion_scorer import MFExpansionScorer +from .mf_group_scorer import coalesce_group_models, groupScorerCount, MFGroupScorer from .scorer import Scorer from .scoring_rules import RuleID @@ -26,17 +28,88 @@ ModelResult = namedtuple("ModelResult", ["scoredNotes", "helpfulnessScores", "auxiliaryNoteInfo"]) -def _get_scorers(seed: Optional[int], pseudoraters: Optional[bool]) -> List[Scorer]: +def _check_flips( + scoredNotes: pd.DataFrame, + noteStatusHistory: pd.DataFrame, + userEnrollment: pd.DataFrame, + modelingPopulations: List[str], + pctFlips: float, + resultCol: str, +) -> bool: + """ + Args: + scoredNotes (pd.DataFrame): notes with scores returned by MF scoring algorithm to evaluate against NSH for flips + noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status + userEnrollment (pd.DataFrame): The enrollment state for each contributor + modelingPopulation (str): name of modelingPopulation(s) of authors for which to evaluate CRH flips + pctFlips (float): percent of flips of unlocked notes that triggers a rerun + resultCol (str): name of column with noteStatus for model + + Returns: + bool whether unlocked note flips exceed acceptable level, triggering a rerun + """ + # combine noteStatusHistory with userEnrollment modelingPopulation + popNsh = noteStatusHistory.merge( + userEnrollment[[c.participantIdKey, c.modelingPopulationKey]], + left_on=c.noteAuthorParticipantIdKey, + right_on=c.participantIdKey, + how="inner", + ) + unlockedPopNsh = popNsh.loc[ + (popNsh[c.modelingPopulationKey].isin(modelingPopulations)) + & (popNsh[c.timestampMillisOfStatusLockKey].isna()) + ] + + # check how many unlocked notes that appeared in previous run have now flipped status + unlockedPopNsh = unlockedPopNsh.merge( + scoredNotes[[c.noteIdKey, resultCol]], on=c.noteIdKey, how="inner" + ) + + statusFlips = len( + unlockedPopNsh.loc[ + (unlockedPopNsh[c.currentLabelKey] != unlockedPopNsh[resultCol]) + & ( + (unlockedPopNsh[c.currentLabelKey] == c.currentlyRatedHelpful) + | (unlockedPopNsh[resultCol] == c.currentlyRatedHelpful) + ) + ] + ) + # take max w/1 to avoid divide by zero error when no prevCRH + prevCrh = max( + 1, len(unlockedPopNsh.loc[unlockedPopNsh[c.currentLabelKey] == c.currentlyRatedHelpful]) + ) + print(f"Percentage of previous CRH flipping status: {(statusFlips / prevCrh)}") + + return (statusFlips / prevCrh) > pctFlips + + +def _get_scorers( + seed: Optional[int], pseudoraters: Optional[bool], enabledScorers: Optional[Set[Scorers]] +) -> Dict[Scorers, List[Scorer]]: """Instantiate all Scorer objects which should be used for note ranking. Args: seed (int, optional): if not None, base distinct seeds for the first and second MF rounds on this value pseudoraters (bool, optional): if True, compute optional pseudorater confidence intervals + enabledScorers: if not None, set of which scorers should be instantiated and enabled Returns: - List[Scorer] containing instantiated Scorer objects for note ranking. + Dict[Scorers, List[Scorer]] containing instantiated Scorer objects for note ranking. """ - return [MFCoreScorer(seed, pseudoraters), MFExpansionScorer(seed), MFCoverageScorer(seed)] + scorers: Dict[Scorers, List[Scorer]] = dict() + + if enabledScorers is None or Scorers.MFCoreScorer in enabledScorers: + scorers[Scorers.MFCoreScorer] = [MFCoreScorer(seed, pseudoraters)] + if enabledScorers is None or Scorers.MFExpansionScorer in enabledScorers: + scorers[Scorers.MFExpansionScorer] = [MFExpansionScorer(seed)] + if enabledScorers is None or Scorers.MFGroupScorer in enabledScorers: + # Note that index 0 is reserved, corresponding to no group assigned, so scoring group + # numbers begin with index 1. + scorers[Scorers.MFGroupScorer] = [ + MFGroupScorer(groupNumber=i) for i in range(1, groupScorerCount + 1) + ] + + return scorers def _merge_results( @@ -94,13 +167,42 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen runCounter = 0 scorerStartTime = time.perf_counter() result = None - - if isinstance(scorer, (MFBaseScorer, MFCoreScorer, MFCoverageScorer, MFExpansionScorer)): - while (len(scorer._mfRanker.train_errors) == 0) or ( - scorer._mfRanker.train_errors[-1] > c.maxTrainError + flips = False + + # TODO: encapsulate this at a lower level so we're not accessing private state and can deal + # with the case where there are no ratings for a scorer to run on after filtering. + if isinstance(scorer, (MFBaseScorer, MFCoreScorer, MFExpansionScorer)): + while ( + (len(scorer._mfRanker.train_errors) == 0) + or (scorer._mfRanker.train_errors[-1] > c.maxTrainError) + or (flips and runCounter < c.maxReruns) ): runCounter += 1 result = ModelResult(*scorer.score(ratings, noteStatusHistory, userEnrollment)) + if runCounter > c.maxReruns: + break + # check for notes createdat > lock time, not more than x% have changed status from nsh + if isinstance(scorer, MFExpansionScorer): + flips = _check_flips( + result.scoredNotes, + noteStatusHistory, + userEnrollment, + [c.expansion], + c.expansionFlipPct, + c.expansionRatingStatusKey, + ) + elif isinstance(scorer, MFCoreScorer): + flips = _check_flips( + result.scoredNotes, + noteStatusHistory, + userEnrollment, + [c.core], + c.coreFlipPct, + c.coreRatingStatusKey, + ) + if flips: + if scorer._seed is not None: + scorer._seed = scorer._seed + 1 else: result = ModelResult(*scorer.score(ratings, noteStatusHistory, userEnrollment)) runCounter += 1 @@ -178,12 +280,17 @@ def _run_scorers( modelResult.helpfulnessScores, modelResult.auxiliaryNoteInfo, ) + scoredNotes, helpfulnessScores = coalesce_group_models(scoredNotes, helpfulnessScores) return scoredNotes, helpfulnessScores, auxiliaryNoteInfo def meta_score( - scoredNotes: pd.DataFrame, auxiliaryNoteInfo: pd.DataFrame, lockedStatus: pd.DataFrame + scorers: Dict[Scorers, List[Scorer]], + scoredNotes: pd.DataFrame, + auxiliaryNoteInfo: pd.DataFrame, + lockedStatus: pd.DataFrame, + enabledScorers: Optional[Set[Scorers]], ) -> Tuple[pd.DataFrame, pd.DataFrame]: """Determine final note status based on individual scoring results. @@ -196,6 +303,7 @@ def meta_score( scoredNotes: pd.DataFrame containing all scored note results. auxiliaryNoteInfo: pd.DataFrame containing tag aggregates lockedStatus: pd.DataFrame containing {noteId, status} pairs for all notes + enabledScorers: if not None, set of which scorers should be instantiated and enabled Returns: Tuple[pd.DataFrame, pd.DataFrame]: @@ -209,32 +317,62 @@ def meta_score( on=c.noteIdKey, ) assert len(scoredNotes) == len(auxiliaryNoteInfo) - rules = [ - scoring_rules.DefaultRule(RuleID.META_INITIAL_NMR, set(), c.needsMoreRatings), - scoring_rules.ApplyModelResult( - RuleID.EXPANSION_MODEL, {RuleID.META_INITIAL_NMR}, c.expansionRatingStatusKey - ), - scoring_rules.ApplyModelResult( - RuleID.CORE_MODEL, {RuleID.EXPANSION_MODEL}, c.coreRatingStatusKey - ), - scoring_rules.ApplyAdditiveModelResult( - RuleID.COVERAGE_MODEL, {RuleID.CORE_MODEL}, c.coverageRatingStatusKey, 0.38 - ), - scoring_rules.ScoringDriftGuard(RuleID.SCORING_DRIFT_GUARD, {RuleID.CORE_MODEL}, lockedStatus), - # TODO: The rule below both sets tags for notes which are CRH / CRNH and unsets status for - # any notes which are CRH / CRNH but don't have enough ratings to assign two tags. The later - # behavior can lead to unsetting locked status. We should refactor this code to (1) remove - # the behavior which unsets status (instead tags will be assigned on a best effort basis) and - # (2) set tags in logic which is not run as a ScoringRule (since ScoringRules function to - # update note status). - scoring_rules.InsufficientExplanation( - RuleID.INSUFFICIENT_EXPLANATION, - {RuleID.CORE_MODEL}, - c.needsMoreRatings, - c.minRatingsToGetTag, - c.minTagsNeededForStatus, - ), + rules: List[scoring_rules.ScoringRule] = [ + scoring_rules.DefaultRule(RuleID.META_INITIAL_NMR, set(), c.needsMoreRatings) ] + # Only attach meta-scoring rules for models which actually run. + if enabledScorers is None or Scorers.MFExpansionScorer in enabledScorers: + rules.append( + scoring_rules.ApplyModelResult( + RuleID.EXPANSION_MODEL, {RuleID.META_INITIAL_NMR}, c.expansionRatingStatusKey + ) + ) + if enabledScorers is None or Scorers.MFCoreScorer in enabledScorers: + rules.append( + scoring_rules.ApplyModelResult( + RuleID.CORE_MODEL, {RuleID.META_INITIAL_NMR}, c.coreRatingStatusKey + ) + ) + if enabledScorers is None or Scorers.MFGroupScorer in enabledScorers: + # TODO: modify this code to work when MFExpansionScorer is disabled by the system test + assert len(scorers[Scorers.MFCoreScorer]) == 1 + assert len(scorers[Scorers.MFExpansionScorer]) == 1 + coreScorer = scorers[Scorers.MFCoreScorer][0] + assert isinstance(coreScorer, MFCoreScorer) + expnasionScorer = scorers[Scorers.MFExpansionScorer][0] + assert isinstance(expnasionScorer, MFExpansionScorer) + coreCrhThreshold = coreScorer.get_crh_threshold() + expansionCrhThreshold = expnasionScorer.get_crh_threshold() + for i in range(1, groupScorerCount + 1): + rules.append( + scoring_rules.ApplyGroupModelResult( + RuleID[f"GROUP_MODEL_{i}"], + {RuleID.EXPANSION_MODEL, RuleID.CORE_MODEL}, + i, + coreCrhThreshold, + expansionCrhThreshold, + ) + ) + rules.extend( + [ + scoring_rules.ScoringDriftGuard( + RuleID.SCORING_DRIFT_GUARD, {RuleID.CORE_MODEL}, lockedStatus + ), + # TODO: The rule below both sets tags for notes which are CRH / CRNH and unsets status for + # any notes which are CRH / CRNH but don't have enough ratings to assign two tags. The later + # behavior can lead to unsetting locked status. We should refactor this code to (1) remove + # the behavior which unsets status (instead tags will be assigned on a best effort basis) and + # (2) set tags in logic which is not run as a ScoringRule (since ScoringRules function to + # update note status). + scoring_rules.InsufficientExplanation( + RuleID.INSUFFICIENT_EXPLANATION, + {RuleID.CORE_MODEL}, + c.needsMoreRatings, + c.minRatingsToGetTag, + c.minTagsNeededForStatus, + ), + ] + ) scoredNotes[c.firstTagKey] = np.nan scoredNotes[c.secondTagKey] = np.nan scoringResult = scoring_rules.apply_scoring_rules( @@ -405,6 +543,26 @@ def _compute_helpfulness_scores( return helpfulnessScores +def _add_deprecated_columns(scoredNotes: pd.DataFrame) -> pd.DataFrame: + """Impute columns which are no longer used but must be maintained in output. + + Args: + scoredNotes: DataFrame containing note scoring output + + Returns: + scoredNotes augmented to include deprecated columns filled with dummy values + """ + for column, columnType in c.deprecatedNoteModelOutputTSVColumnsAndTypes: + assert column not in scoredNotes.columns + if columnType == np.double: + scoredNotes[column] = np.nan + elif columnType == np.str: + scoredNotes[column] = "" + else: + assert False, f"column type {columnType} unsupported" + return scoredNotes + + def _validate( scoredNotes: pd.DataFrame, helpfulnessScores: pd.DataFrame, @@ -447,6 +605,8 @@ def run_scoring( userEnrollment: pd.DataFrame, seed: Optional[int] = None, pseudoraters: Optional[bool] = True, + enabledScorers: Optional[Set[Scorers]] = None, + strictColumns: bool = True, ): """Invokes note scoring algorithms, merges results and computes user stats. @@ -456,6 +616,8 @@ def run_scoring( userEnrollment (pd.DataFrame): The enrollment state for each contributor seed (int, optional): if not None, base distinct seeds for the first and second MF rounds on this value pseudoraters (bool, optional): if True, compute optional pseudorater confidence intervals + enabledScorers (Set[Scorers], optional): Scorers which should be instantiated + strictColumns (bool, optional): if True, validate which columns are present Returns: Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: @@ -465,9 +627,9 @@ def run_scoring( auxiliaryNoteInfo: one row per note containing adjusted and ratio tag values """ # Apply individual scoring models and obtained merged result. - scorers = _get_scorers(seed, pseudoraters) + scorers = _get_scorers(seed, pseudoraters, enabledScorers) scoredNotes, helpfulnessScores, auxiliaryNoteInfo = _run_scorers( - scorers, ratings, noteStatusHistory, userEnrollment + list(chain(*scorers.values())), ratings, noteStatusHistory, userEnrollment ) # Augment scoredNotes and auxiliaryNoteInfo with additional attributes for each note @@ -479,7 +641,11 @@ def run_scoring( # Assign final status to notes based on individual model scores and note attributes. scoredNotesCols, auxiliaryNoteInfoCols = meta_score( - scoredNotes, auxiliaryNoteInfo, noteStatusHistory[[c.noteIdKey, c.lockedStatusKey]] + scorers, + scoredNotes, + auxiliaryNoteInfo, + noteStatusHistory[[c.noteIdKey, c.lockedStatusKey]], + enabledScorers, ) scoredNotes = scoredNotes.merge(scoredNotesCols, on=c.noteIdKey) auxiliaryNoteInfo = auxiliaryNoteInfo.merge(auxiliaryNoteInfoCols, on=c.noteIdKey) @@ -505,5 +671,10 @@ def run_scoring( noteStatusHistory ), "noteStatusHistory should contain all notes after preprocessing" - # Finalize output dataframes with correct columns. - return _validate(scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo) + # Skip validation and selection out output columns if the set of scorers is overridden. + scoredNotes = _add_deprecated_columns(scoredNotes) + if strictColumns: + scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo = _validate( + scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo + ) + return scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo diff --git a/sourcecode/scoring/scorer.py b/sourcecode/scoring/scorer.py index 20d0ff8e..b01c42ba 100644 --- a/sourcecode/scoring/scorer.py +++ b/sourcecode/scoring/scorer.py @@ -58,6 +58,35 @@ def _filter_input( """ return ratings, noteStatusHistory + def _postprocess_output( + self, + noteScores: pd.DataFrame, + userScores: pd.DataFrame, + ratings: pd.DataFrame, + noteStatusHistory: pd.DataFrame, + userEnrollment: pd.DataFrame, + ) -> Tuple[pd.DataFrame, pd.DataFrame]: + """Prune noteScores and userScores and augment with any additional columns as necessary. + + Note that ratings, noteStatusHistory and userEnrollment are expected to be the *raw* + versions which were supplied to "score", not the version output after filtering. + Operating on the raw versions allows accurately computing statistics over the entire dataset + (e.g. fraction of users from a modeling group). + + Args: + noteScores (pd.DataFrame): scoring output for notes + userScores (pd.DataFrame): scoirng output for users + ratings (pd.DataFrame): preprocessed ratings + noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status + userEnrollment (pd.DataFrame): one row per user specifying enrollment properties + + Returns: + Tuple[pd.DataFrame, pd.DataFrame]: + noteScores: note scoring output from _score_notes_and_users + userScores: user scoring output from _score_notes_and_users + """ + return noteScores, userScores + def _get_note_col_mapping(self) -> Dict[str, str]: """Returns a dict mapping default note column names to custom names for a specific model.""" return {} @@ -83,14 +112,17 @@ def _score_notes_and_users( """ def score( - self, ratings: pd.DataFrame, noteStatusHistory: pd.DataFrame, userEnrollment: pd.DataFrame + self, + ratingsRaw: pd.DataFrame, + noteStatusHistoryRaw: pd.DataFrame, + userEnrollmentRaw: pd.DataFrame, ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame]]: """Process ratings to assign status to notes and optionally compute rater properties. Args: - ratings (pd.DataFrame): preprocessed ratings - noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status - userEnrollment (pd.DataFrame): one row per user specifying enrollment properties + ratingsRaw (pd.DataFrame): preprocessed ratings + noteStatusHistoryRaw (pd.DataFrame): one row per note; history of when note had each status + userEnrollmentRaw (pd.DataFrame): one row per user specifying enrollment properties Returns: Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: @@ -99,10 +131,27 @@ def score( auxiliaryNoteInfo: one row per note containing adjusted and ratio tag values """ # Transform input, run core scoring algorithm, transform output. - ratings, noteStatusHistory = self._filter_input(ratings, noteStatusHistory, userEnrollment) + ratings, noteStatusHistory = self._filter_input( + ratingsRaw, noteStatusHistoryRaw, userEnrollmentRaw + ) + # If there are no ratings left after filtering, then return empty dataframes. + if len(ratings) == 0: + return ( + pd.DataFrame(columns=self.get_scored_notes_cols()), + pd.DataFrame(columns=self.get_helpfulness_scores_cols()) + if self.get_helpfulness_scores_cols() + else None, + pd.DataFrame(columns=self.get_auxiliary_note_info_cols()) + if self.get_auxiliary_note_info_cols() + else None, + ) noteScores, userScores = self._score_notes_and_users(ratings, noteStatusHistory) + noteScores, userScores = self._postprocess_output( + noteScores, userScores, ratingsRaw, noteStatusHistoryRaw, userEnrollmentRaw + ) noteScores = noteScores.rename(columns=self._get_note_col_mapping()) userScores = userScores.rename(columns=self._get_user_col_mapping()) + # TODO: Tolerate unexpcted columns if --nostrict-columns is set. # Process noteScores noteScores = noteScores.drop(columns=self._get_dropped_note_cols()) assert set(noteScores.columns) == set( @@ -112,7 +161,12 @@ def score( Missing expected columns that should've been in noteScores: {set(self.get_scored_notes_cols() + self.get_auxiliary_note_info_cols()) - set(noteScores.columns)}""" # Process userScores userScores = userScores.drop(columns=self._get_dropped_user_cols()) - assert set(userScores.columns) == set(self.get_helpfulness_scores_cols()) + assert set(userScores.columns) == set( + self.get_helpfulness_scores_cols() + ), f"""all columns must be either dropped or explicitly defined in an output. + Extra columns that were in userScores: {set(userScores.columns) - set(self.get_helpfulness_scores_cols())} + Missing expected columns that should've been in userScores: {set(self.get_helpfulness_scores_cols()) - set(userScores.columns)}""" + # Return dataframes with specified columns in specified order return ( noteScores[self.get_scored_notes_cols()], diff --git a/sourcecode/scoring/scoring_rules.py b/sourcecode/scoring/scoring_rules.py index 583b3b1a..ea9bda94 100644 --- a/sourcecode/scoring/scoring_rules.py +++ b/sourcecode/scoring/scoring_rules.py @@ -28,6 +28,7 @@ class RuleID(Enum): NM_CRNH = RuleAndVersion("NmCRNH", "1.0", False) GENERAL_CRH_INERTIA = RuleAndVersion("GeneralCRHInertia", "1.0", False) ELEVATED_CRH_INERTIA = RuleAndVersion("ElevatedCRHInertia", "1.0", False) + LCB_INERTIA = RuleAndVersion("LcbCRHInertia", "1.0", False) INCORRECT_OUTLIER = RuleAndVersion("FilterIncorrect", "1.0", False) # Rules used in _meta_score. @@ -35,6 +36,18 @@ class RuleID(Enum): EXPANSION_MODEL = RuleAndVersion("ExpansionModel", "1.1", False) CORE_MODEL = RuleAndVersion("CoreModel", "1.1", True) COVERAGE_MODEL = RuleAndVersion("CoverageModel", "1.1", False) + GROUP_MODEL_1 = RuleAndVersion("GroupModel01", "1.1", False) + GROUP_MODEL_2 = RuleAndVersion("GroupModel02", "1.1", False) + GROUP_MODEL_3 = RuleAndVersion("GroupModel03", "1.1", False) + GROUP_MODEL_4 = RuleAndVersion("GroupModel04", "1.1", False) + GROUP_MODEL_5 = RuleAndVersion("GroupModel05", "1.1", False) + GROUP_MODEL_6 = RuleAndVersion("GroupModel06", "1.1", False) + GROUP_MODEL_7 = RuleAndVersion("GroupModel07", "1.1", False) + GROUP_MODEL_8 = RuleAndVersion("GroupModel08", "1.1", False) + GROUP_MODEL_9 = RuleAndVersion("GroupModel09", "1.1", False) + GROUP_MODEL_10 = RuleAndVersion("GroupModel10", "1.1", False) + GROUP_MODEL_11 = RuleAndVersion("GroupModel11", "1.1", False) + GROUP_MODEL_12 = RuleAndVersion("GroupModel12", "1.1", False) INSUFFICIENT_EXPLANATION = RuleAndVersion("InsufficientExplanation", "1.0", True) SCORING_DRIFT_GUARD = RuleAndVersion("ScoringDriftGuard", "1.0", False) @@ -185,56 +198,6 @@ def score_notes( return (noteStatusUpdates, None) -class ApplyAdditiveModelResult(ScoringRule): - def __init__( - self, - ruleID: RuleID, - dependencies: Set[RuleID], - sourceColumn: str, - coreThreshold: float, - ): - """Set CRH status based on probationary model subject to safeguard threshold on core model. - - This rule sets CRH note status based on probationary models subject to several criteria: - * The note must be have CRH status from the probationary model. - * The note must currently be scored as NMR. This criteria guarantees that (1) probationary - models strictly expand coverage and (2) notes which were rated CRH by the core - model never have the decidedBy field overwritten by a less confident model. - * The note must score above a defined threshold on the core model. This criteria acts as - safeguard against dangerous detections from probationary models. - - Args: - rule: enum corresponding to a namedtuple defining a rule name and version string for the ScoringRule. - dependencies: Rules which must run before this rule can run. - sourceColumn: column containing note status (CRH, CRNH, NMR) to propagate to output. - coreThreshold: minimum score which notes must receive from the core model. - """ - super().__init__(ruleID, dependencies) - self._sourceColumn = sourceColumn - self._coreThreshold = coreThreshold - - def score_notes( - self, noteStats: pd.DataFrame, currentLabels: pd.DataFrame, statusColumn: str - ) -> (Tuple[pd.DataFrame, Optional[pd.DataFrame]]): - """Flip notes from NMR to CRH based on probationary model and subject to core model safeguard.""" - # Identify notes which meet each of the 3 criteria. - probationaryCRHNotes = noteStats[noteStats[self._sourceColumn] == c.currentlyRatedHelpful][ - [c.noteIdKey] - ] - currentNMRNotes = currentLabels[currentLabels[statusColumn] == c.needsMoreRatings][ - [c.noteIdKey] - ] - aboveSafeGuard = noteStats[noteStats[c.coreNoteInterceptKey] > self._coreThreshold][ - [c.noteIdKey] - ] - # Identify overlap and return status update. - noteStatusUpdates = probationaryCRHNotes.merge( - currentNMRNotes, on=c.noteIdKey, how="inner" - ).merge(aboveSafeGuard, on=c.noteIdKey, how="inner") - noteStatusUpdates[statusColumn] = c.currentlyRatedHelpful - return (noteStatusUpdates, None) - - class FilterTagOutliers(ScoringRule): def __init__( self, @@ -357,6 +320,92 @@ def score_notes( return (noteStatusUpdates, None) +class ApplyGroupModelResult(ScoringRule): + def __init__( + self, + ruleID: RuleID, + dependencies: Set[RuleID], + groupNumber: int, + coreCrhThreshold: float, + expansionCrhThreshold: float, + minSafeguardThreshold: float = 0.3, + ): + """Set CRH status based on a modeling group result. + + This rule sets CRH note status based on group models subject to several criteria: + * The note must be have CRH status from the group model. + * The note must currently be scored as NMR. This criteria guarantees that (1) group + models strictly expand coverage and (2) notes which were rated CRH by the core + model never have the decidedBy field overwritten by a less confident model. + * The note must have an intercept from either the core or expansion models, and the intercept + of the most confident model must fall within a defined range. We construct the range + to guarantee we can avoid CRHing notes which substantially lacked broad appeal, and + to guarantee that we will not CRH a note which was blocked by tag or inaccuracy filtering + from either the core or expansion models, as applicable. + + Args: + ruleID: enum corresponding to a namedtuple defining a rule name and version string for the ScoringRule. + dependencies: Rules which must run before this rule can run. + groupNumber: modeling group index which this instance of ApplyGroupModelResult should act on. + coreCrhThreshold: maximum intercept allowed on core model for group model CRH notes. + expansionCrhThreshold: maximum intercept allowed on expansion model for group model CRH notes. + minSafeguardThreshold: minimum intercept for core or expansion model. + """ + super().__init__(ruleID, dependencies) + self._groupNumber = groupNumber + self._minSafeguardThreshold = minSafeguardThreshold + self._coreCrhThreshold = coreCrhThreshold + self._expansionCrhThreshold = expansionCrhThreshold + + def score_notes( + self, noteStats: pd.DataFrame, currentLabels: pd.DataFrame, statusColumn: str + ) -> (Tuple[pd.DataFrame, Optional[pd.DataFrame]]): + """Flip notes from NMR to CRH based on group models and subject to core/expansion model safeguards.""" + # Identify notes which were CRH from the applicable group model. + probationaryCRHNotes = noteStats[ + (noteStats[c.groupRatingStatusKey] == c.currentlyRatedHelpful) + & (noteStats[c.modelingGroupKey] == self._groupNumber) + ][[c.noteIdKey]] + # Identify notes which are currently NMR. + currentNMRNotes = currentLabels[currentLabels[statusColumn] == c.needsMoreRatings][ + [c.noteIdKey] + ] + # Identify notes which pass score bound checks for expansion and core models. + noteStats = noteStats[[c.noteIdKey, c.coreNoteInterceptKey, c.expansionNoteInterceptKey]].copy() + noteStats["core"] = (noteStats[c.coreNoteInterceptKey] < self._coreCrhThreshold) & ( + noteStats[c.coreNoteInterceptKey] > self._minSafeguardThreshold + ) + noteStats.loc[noteStats[c.coreNoteInterceptKey].isna(), "core"] = np.nan + noteStats["expansion"] = ( + noteStats[c.expansionNoteInterceptKey] < self._expansionCrhThreshold + ) & (noteStats[c.expansionNoteInterceptKey] > self._minSafeguardThreshold) + noteStats.loc[noteStats[c.expansionNoteInterceptKey].isna(), "expansion"] = np.nan + + def _get_value(row): + idx = row.first_valid_index() + # If either core or expansion had an intercept then return whether it was in the valid + # range. If neither had an intercept, return False. Preference is given to core due + # to the ordering when selecting columns from noteStats below. + if idx is None: + return False + elif row[idx] == 1.0: + return True + elif row[idx] == 0.0: + return False + else: + assert False, f"unexpected value: {row[idx]}" + + noteStats["actionable"] = noteStats[["core", "expansion"]].apply(_get_value, axis=1) + actionableNotes = noteStats[noteStats["actionable"]][[c.noteIdKey]] + + # Identify overlap and return status update. + noteStatusUpdates = probationaryCRHNotes.merge( + currentNMRNotes, on=c.noteIdKey, how="inner" + ).merge(actionableNotes, on=c.noteIdKey, how="inner") + noteStatusUpdates[statusColumn] = c.currentlyRatedHelpful + return (noteStatusUpdates, None) + + class InsufficientExplanation(ScoringRule): def __init__( self, From fed8763ce29ba62e5af5820e6aad62f8d1bf6ee7 Mon Sep 17 00:00:00 2001 From: Brad Miller Date: Fri, 28 Jul 2023 10:05:13 -0700 Subject: [PATCH 2/2] merge conflict resolutions --- sourcecode/scoring/enums.py | 5 --- sourcecode/scoring/run_scoring.py | 63 ------------------------------- 2 files changed, 68 deletions(-) diff --git a/sourcecode/scoring/enums.py b/sourcecode/scoring/enums.py index 623dcf0b..2e92df1f 100644 --- a/sourcecode/scoring/enums.py +++ b/sourcecode/scoring/enums.py @@ -6,15 +6,10 @@ class Scorers(Enum): """Exhaustive list of all scorers to simplify setting enabled/disabled scorers.""" MFCoreScorer = auto() -<<<<<<< HEAD MFExpansionScorer = auto() # Note that the MFGroupScorer value controls whether *all* group scorers are instantiated, # not just a single MFGroupScorer instance. MFGroupScorer = auto() -======= - MFCoverageScorer = auto() - MFExpansionScorer = auto() ->>>>>>> main def scorers_from_csv(csv: str) -> Set[Scorers]: diff --git a/sourcecode/scoring/run_scoring.py b/sourcecode/scoring/run_scoring.py index e92e9b72..07c258a0 100644 --- a/sourcecode/scoring/run_scoring.py +++ b/sourcecode/scoring/run_scoring.py @@ -10,20 +10,12 @@ from itertools import chain import multiprocessing import time -<<<<<<< HEAD from typing import Dict, List, Optional, Set, Tuple -======= -from typing import List, Optional, Set, Tuple ->>>>>>> main from . import constants as c, contributor_state, note_ratings, note_status_history, scoring_rules from .enums import Scorers from .mf_base_scorer import MFBaseScorer from .mf_core_scorer import MFCoreScorer -<<<<<<< HEAD -======= -from .mf_coverage_scorer import MFCoverageScorer, MFDummyCoverageScorer ->>>>>>> main from .mf_expansion_scorer import MFExpansionScorer from .mf_group_scorer import coalesce_group_models, groupScorerCount, MFGroupScorer from .scorer import Scorer @@ -72,10 +64,6 @@ def _check_flips( unlockedPopNsh = unlockedPopNsh.merge( scoredNotes[[c.noteIdKey, resultCol]], on=c.noteIdKey, how="inner" ) -<<<<<<< HEAD - -======= ->>>>>>> main statusFlips = len( unlockedPopNsh.loc[ (unlockedPopNsh[c.currentLabelKey] != unlockedPopNsh[resultCol]) @@ -89,10 +77,6 @@ def _check_flips( prevCrh = max( 1, len(unlockedPopNsh.loc[unlockedPopNsh[c.currentLabelKey] == c.currentlyRatedHelpful]) ) -<<<<<<< HEAD -======= - ->>>>>>> main print(f"Percentage of previous CRH flipping status: {(statusFlips / prevCrh)}") return (statusFlips / prevCrh) > pctFlips @@ -100,11 +84,7 @@ def _check_flips( def _get_scorers( seed: Optional[int], pseudoraters: Optional[bool], enabledScorers: Optional[Set[Scorers]] -<<<<<<< HEAD ) -> Dict[Scorers, List[Scorer]]: -======= -) -> List[Scorer]: ->>>>>>> main """Instantiate all Scorer objects which should be used for note ranking. Args: @@ -115,7 +95,6 @@ def _get_scorers( Returns: Dict[Scorers, List[Scorer]] containing instantiated Scorer objects for note ranking. """ -<<<<<<< HEAD scorers: Dict[Scorers, List[Scorer]] = dict() if enabledScorers is None or Scorers.MFCoreScorer in enabledScorers: @@ -128,18 +107,6 @@ def _get_scorers( scorers[Scorers.MFGroupScorer] = [ MFGroupScorer(groupNumber=i) for i in range(1, groupScorerCount + 1) ] -======= - scorers: List[Scorer] = [] - - if enabledScorers is None or Scorers.MFCoreScorer in enabledScorers: - scorers.append(MFCoreScorer(seed, pseudoraters)) - if enabledScorers is None or Scorers.MFExpansionScorer in enabledScorers: - scorers.append(MFExpansionScorer(seed)) - if enabledScorers is not None and Scorers.MFCoverageScorer in enabledScorers: - scorers.append(MFCoverageScorer(seed)) - else: - scorers.append(MFDummyCoverageScorer()) ->>>>>>> main return scorers @@ -201,15 +168,9 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen result = None flips = False -<<<<<<< HEAD # TODO: encapsulate this at a lower level so we're not accessing private state and can deal # with the case where there are no ratings for a scorer to run on after filtering. if isinstance(scorer, (MFBaseScorer, MFCoreScorer, MFExpansionScorer)): -======= - if isinstance( - scorer, (MFBaseScorer, MFCoreScorer, MFCoverageScorer, MFExpansionScorer) - ) and not isinstance(scorer, MFDummyCoverageScorer): ->>>>>>> main while ( (len(scorer._mfRanker.train_errors) == 0) or (scorer._mfRanker.train_errors[-1] > c.maxTrainError) @@ -217,11 +178,8 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen ): runCounter += 1 result = ModelResult(*scorer.score(ratings, noteStatusHistory, userEnrollment)) -<<<<<<< HEAD if runCounter > c.maxReruns: break -======= ->>>>>>> main # check for notes createdat > lock time, not more than x% have changed status from nsh if isinstance(scorer, MFExpansionScorer): flips = _check_flips( @@ -232,7 +190,6 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen c.expansionFlipPct, c.expansionRatingStatusKey, ) -<<<<<<< HEAD elif isinstance(scorer, MFCoreScorer): flips = _check_flips( result.scoredNotes, @@ -245,8 +202,6 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen if flips: if scorer._seed is not None: scorer._seed = scorer._seed + 1 -======= ->>>>>>> main else: result = ModelResult(*scorer.score(ratings, noteStatusHistory, userEnrollment)) runCounter += 1 @@ -330,10 +285,7 @@ def _run_scorers( def meta_score( -<<<<<<< HEAD scorers: Dict[Scorers, List[Scorer]], -======= ->>>>>>> main scoredNotes: pd.DataFrame, auxiliaryNoteInfo: pd.DataFrame, lockedStatus: pd.DataFrame, @@ -380,7 +332,6 @@ def meta_score( RuleID.CORE_MODEL, {RuleID.META_INITIAL_NMR}, c.coreRatingStatusKey ) ) -<<<<<<< HEAD if enabledScorers is None or Scorers.MFGroupScorer in enabledScorers: # TODO: modify this code to work when MFExpansionScorer is disabled by the system test assert len(scorers[Scorers.MFCoreScorer]) == 1 @@ -401,14 +352,6 @@ def meta_score( expansionCrhThreshold, ) ) -======= - if enabledScorers is not None and Scorers.MFCoverageScorer in enabledScorers: - rules.append( - scoring_rules.ApplyAdditiveModelResult( - RuleID.COVERAGE_MODEL, {RuleID.META_INITIAL_NMR}, c.coverageRatingStatusKey, 0.38 - ) - ) ->>>>>>> main rules.extend( [ scoring_rules.ScoringDriftGuard( @@ -697,10 +640,7 @@ def run_scoring( # Assign final status to notes based on individual model scores and note attributes. scoredNotesCols, auxiliaryNoteInfoCols = meta_score( -<<<<<<< HEAD scorers, -======= ->>>>>>> main scoredNotes, auxiliaryNoteInfo, noteStatusHistory[[c.noteIdKey, c.lockedStatusKey]], @@ -731,10 +671,7 @@ def run_scoring( ), "noteStatusHistory should contain all notes after preprocessing" # Skip validation and selection out output columns if the set of scorers is overridden. -<<<<<<< HEAD scoredNotes = _add_deprecated_columns(scoredNotes) -======= ->>>>>>> main if strictColumns: scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo = _validate( scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo