From cf3dd25175597bd25ab3b58fec9a582a96b8e872 Mon Sep 17 00:00:00 2001
From: Brad Miller <bradm@twitter.com>
Date: Thu, 27 Jul 2023 09:34:37 -0700
Subject: [PATCH 1/2] group modling and other assorted updates

---
 sourcecode/main.py                         |  25 +-
 sourcecode/scoring/constants.py            |  86 +++++--
 sourcecode/scoring/enums.py                |  33 +++
 sourcecode/scoring/incorrect_filter.py     |   4 +-
 sourcecode/scoring/matrix_factorization.py | 147 ++++++++---
 sourcecode/scoring/mf_base_scorer.py       |   7 +-
 sourcecode/scoring/mf_coverage_scorer.py   |  76 ------
 sourcecode/scoring/mf_group_scorer.py      | 285 +++++++++++++++++++++
 sourcecode/scoring/note_ratings.py         |  21 +-
 sourcecode/scoring/process_data.py         |  42 ++-
 sourcecode/scoring/run_scoring.py          | 251 +++++++++++++++---
 sourcecode/scoring/scorer.py               |  66 ++++-
 sourcecode/scoring/scoring_rules.py        | 149 +++++++----
 13 files changed, 956 insertions(+), 236 deletions(-)
 create mode 100644 sourcecode/scoring/enums.py
 delete mode 100644 sourcecode/scoring/mf_coverage_scorer.py
 create mode 100644 sourcecode/scoring/mf_group_scorer.py

diff --git a/sourcecode/main.py b/sourcecode/main.py
index a10a68ba..fcf3bd75 100644
--- a/sourcecode/main.py
+++ b/sourcecode/main.py
@@ -14,6 +14,7 @@
 import os
 
 import scoring.constants as c
+from scoring.enums import scorers_from_csv
 from scoring.process_data import get_data, write_tsv_local
 from scoring.run_scoring import run_scoring
 
@@ -59,12 +60,28 @@ def parse_args():
   )
   parser.set_defaults(pseudoraters=True)
   parser.add_argument("-r", "--ratings", default=c.ratingsInputPath, help="rating dataset")
+  parser.add_argument(
+    "--scorers", default=None, type=scorers_from_csv, help="CSV list of scorers to enable."
+  )
   parser.add_argument(
     "--seed", default=None, type=int, help="set to an int to seed matrix factorization"
   )
   parser.add_argument(
     "-s", "--status", default=c.noteStatusHistoryInputPath, help="note status history dataset"
   )
+  parser.add_argument(
+    "--strict-columns",
+    dest="strict_columns",
+    help="Explicitly select columns and require that expected columns are present.",
+    action="store_true",
+  )
+  parser.add_argument(
+    "--nostrict-columns",
+    help="Disable validation of expected columns and allow unexpected columns.",
+    action="store_false",
+    dest="strict_columns",
+  )
+  parser.set_defaults(strict_columns=True)
 
   return parser.parse_args()
 
@@ -82,7 +99,13 @@ def main():
 
   # Invoke scoring and user contribution algorithms.
   scoredNotes, helpfulnessScores, newStatus, auxNoteInfo = run_scoring(
-    ratings, statusHistory, userEnrollment, seed=args.seed, pseudoraters=args.pseudoraters
+    ratings,
+    statusHistory,
+    userEnrollment,
+    seed=args.seed,
+    pseudoraters=args.pseudoraters,
+    enabledScorers=args.scorers,
+    strictColumns=args.strict_columns,
   )
 
   # Write outputs to local disk.
diff --git a/sourcecode/scoring/constants.py b/sourcecode/scoring/constants.py
index 497c3d44..51f213bc 100644
--- a/sourcecode/scoring/constants.py
+++ b/sourcecode/scoring/constants.py
@@ -1,6 +1,7 @@
 import time
 
 import numpy as np
+import pandas as pd
 
 
 # Store the timestamp at which the constants module is initialized.  Note
@@ -15,6 +16,10 @@
 
 maxTrainError = 0.09
 
+coreFlipPct = 0.15
+expansionFlipPct = 0.19
+maxReruns = 5
+
 # Explanation Tags
 minRatingsToGetTag = 2
 minTagsNeededForStatus = 2
@@ -37,6 +42,7 @@
 summaryKey = "summary"
 authorTopNotHelpfulTagValues = "authorTopNotHelpfulTagValues"
 modelingPopulationKey = "modelingPopulation"
+modelingGroupKey = "modelingGroup"
 
 # TSV Values
 notHelpfulValueTsv = "NOT_HELPFUL"
@@ -123,6 +129,14 @@ def rater_factor_key(i):
 coverageRatingStatusKey = "coverageRatingStatus"
 coverageNoteInterceptMaxKey = "coverageNoteInterceptMax"
 coverageNoteInterceptMinKey = "coverageNoteInterceptMin"
+# Group Model
+groupNoteInterceptKey = "groupNoteIntercept"
+groupNoteFactor1Key = "groupNoteFactor1"
+groupRatingStatusKey = "groupRatingStatus"
+groupNoteInterceptMaxKey = "groupNoteInterceptMax"
+groupNoteInterceptMinKey = "groupNoteInterceptMin"
+groupRaterInterceptKey = "groupRaterIntercept"
+groupRaterFactor1Key = "groupRaterFactor1"
 
 # Ids and Indexes
 noteIdKey = "noteId"
@@ -216,12 +230,10 @@ def rater_factor_key(i):
 
 incorrectFilterColumns = [
   "notHelpfulIncorrect_interval",
-  "notHelpfulIncorrect_total_interval",
   "cnt_interval",
   "num_voters_interval",
   "tf_idf_incorrect_interval",
   "notHelpfulIncorrect_same",
-  "notHelpfulIncorrect_total_same",
   "cnt_same",
   "num_voters_same",
   "tf_idf_incorrect_same",
@@ -261,11 +273,7 @@ def rater_factor_key(i):
   ]
   + misleadingTagsAndTypes
   + notMisleadingTagsAndTypes
-  + [
-    ("trustworthySources", np.int64),
-    (summaryKey, np.object),
-    ("isMediaNote", np.int64)
-  ]
+  + [("trustworthySources", np.int64), (summaryKey, np.object), ("isMediaNote", np.int64)]
 )
 noteTSVColumns = [col for (col, dtype) in noteTSVColumnsAndTypes]
 noteTSVTypes = [dtype for (col, dtype) in noteTSVColumnsAndTypes]
@@ -366,6 +374,16 @@ def rater_factor_key(i):
 userEnrollmentTSVTypes = [dtype for (_, dtype) in userEnrollmentTSVColumnsAndTypes]
 userEnrollmentTSVTypeMapping = {col: dtype for (col, dtype) in userEnrollmentTSVColumnsAndTypes}
 
+# TODO: delete expanded user enrollment definition once modeling group is fully rolled out
+userEnrollmentExpandedTSVColumnsAndTypes = userEnrollmentTSVColumnsAndTypes + [
+  (modelingGroupKey, np.float64)
+]
+userEnrollmentExpandedTSVColumns = [col for (col, _) in userEnrollmentExpandedTSVColumnsAndTypes]
+userEnrollmentExpandedTSVTypes = [dtype for (_, dtype) in userEnrollmentExpandedTSVColumnsAndTypes]
+userEnrollmentExpandedTSVTypeMapping = {
+  col: dtype for (col, dtype) in userEnrollmentExpandedTSVColumnsAndTypes
+}
+
 noteInterceptMaxKey = "internalNoteIntercept_max"
 noteInterceptMinKey = "internalNoteIntercept_min"
 noteParameterUncertaintyTSVMainColumnsAndTypes = [
@@ -423,6 +441,16 @@ def rater_factor_key(i):
   + incorrectFilterColumns
 )
 
+deprecatedNoteModelOutputColumns = frozenset(
+  {
+    coverageNoteInterceptKey,
+    coverageNoteFactor1Key,
+    coverageRatingStatusKey,
+    coverageNoteInterceptMinKey,
+    coverageNoteInterceptMaxKey,
+  }
+)
+
 noteModelOutputTSVColumnsAndTypes = [
   (noteIdKey, np.int64),
   (coreNoteInterceptKey, np.double),
@@ -453,9 +481,20 @@ def rater_factor_key(i):
   (expansionNoteInterceptMaxKey, np.double),
   (coverageNoteInterceptMinKey, np.double),
   (coverageNoteInterceptMaxKey, np.double),
+  (groupNoteInterceptKey, np.double),
+  (groupNoteFactor1Key, np.double),
+  (groupRatingStatusKey, np.str),
+  (groupNoteInterceptMaxKey, np.double),
+  (groupNoteInterceptMinKey, np.double),
+  (modelingGroupKey, np.float64),
 ]
 noteModelOutputTSVColumns = [col for (col, dtype) in noteModelOutputTSVColumnsAndTypes]
 noteModelOutputTSVTypeMapping = {col: dtype for (col, dtype) in noteModelOutputTSVColumnsAndTypes}
+deprecatedNoteModelOutputTSVColumnsAndTypes = [
+  (col, dtype)
+  for (col, dtype) in noteModelOutputTSVColumnsAndTypes
+  if col in deprecatedNoteModelOutputColumns
+]
 
 raterModelOutputTSVColumnsAndTypes = [
   (raterParticipantIdKey, np.int64),
@@ -464,25 +503,28 @@ def rater_factor_key(i):
   (crhCrnhRatioDifferenceKey, np.double),
   (meanNoteScoreKey, np.double),
   (raterAgreeRatioKey, np.double),
-  (successfulRatingHelpfulCount, np.int64),
-  (successfulRatingNotHelpfulCount, np.int64),
-  (successfulRatingTotal, np.int64),
-  (unsuccessfulRatingHelpfulCount, np.int64),
-  (unsuccessfulRatingNotHelpfulCount, np.int64),
-  (unsuccessfulRatingTotal, np.int64),
-  (ratingsAwaitingMoreRatings, np.int64),
-  (ratedAfterDecision, np.int64),
-  (notesCurrentlyRatedHelpful, np.int64),
-  (notesCurrentlyRatedNotHelpful, np.int64),
-  (notesAwaitingMoreRatings, np.int64),
+  (successfulRatingHelpfulCount, pd.Int64Dtype()),
+  (successfulRatingNotHelpfulCount, pd.Int64Dtype()),
+  (successfulRatingTotal, pd.Int64Dtype()),
+  (unsuccessfulRatingHelpfulCount, pd.Int64Dtype()),
+  (unsuccessfulRatingNotHelpfulCount, pd.Int64Dtype()),
+  (unsuccessfulRatingTotal, pd.Int64Dtype()),
+  (ratingsAwaitingMoreRatings, pd.Int64Dtype()),
+  (ratedAfterDecision, pd.Int64Dtype()),
+  (notesCurrentlyRatedHelpful, pd.Int64Dtype()),
+  (notesCurrentlyRatedNotHelpful, pd.Int64Dtype()),
+  (notesAwaitingMoreRatings, pd.Int64Dtype()),
   (enrollmentState, np.int32),
-  (successfulRatingNeededToEarnIn, np.int64),
+  (successfulRatingNeededToEarnIn, pd.Int64Dtype()),
   (authorTopNotHelpfulTagValues, np.str),
-  (timestampOfLastStateChange, np.int64),
-  (aboveHelpfulnessThresholdKey, np.bool_),
+  (timestampOfLastStateChange, np.double),
+  (aboveHelpfulnessThresholdKey, np.float64),  # nullable bool
   (isEmergingWriterKey, np.bool_),
-  (aggregateRatingReceivedTotal, np.int64),
+  (aggregateRatingReceivedTotal, pd.Int64Dtype()),
   (timestampOfLastEarnOut, np.double),
+  (groupRaterInterceptKey, np.double),
+  (groupRaterFactor1Key, np.double),
+  (modelingGroupKey, np.float64),
 ]
 raterModelOutputTSVColumns = [col for (col, dtype) in raterModelOutputTSVColumnsAndTypes]
 raterModelOutputTSVTypeMapping = {col: dtype for (col, dtype) in raterModelOutputTSVColumnsAndTypes}
diff --git a/sourcecode/scoring/enums.py b/sourcecode/scoring/enums.py
new file mode 100644
index 00000000..2e92df1f
--- /dev/null
+++ b/sourcecode/scoring/enums.py
@@ -0,0 +1,33 @@
+from enum import auto, Enum
+from typing import Set
+
+
+class Scorers(Enum):
+  """Exhaustive list of all scorers to simplify setting enabled/disabled scorers."""
+
+  MFCoreScorer = auto()
+  MFExpansionScorer = auto()
+  # Note that the MFGroupScorer value controls whether *all* group scorers are instantiated,
+  # not just a single MFGroupScorer instance.
+  MFGroupScorer = auto()
+
+
+def scorers_from_csv(csv: str) -> Set[Scorers]:
+  """Converts a CSV of enums to an actual set of Enum values.
+
+  Args:
+    csv: CSV string of Scorer names.
+
+  Returns:
+    Set containing Scorers.
+
+  Raises:
+    ValueError if csv contains a token which is not a valid Scorer.
+  """
+  values = []
+  for value in csv.split(","):
+    try:
+      values.append(Scorers[value])
+    except KeyError:
+      raise ValueError(f"Unknown value {value}")
+  return set(values)
diff --git a/sourcecode/scoring/incorrect_filter.py b/sourcecode/scoring/incorrect_filter.py
index 3706087b..f8e3e6c2 100644
--- a/sourcecode/scoring/incorrect_filter.py
+++ b/sourcecode/scoring/incorrect_filter.py
@@ -53,6 +53,8 @@ def _get_incorrect_tfidf_ratio(
     suffix: suffix for incorrect and count column names for this filter
 
   Returns:
+    pd.DataFrame with one row for each note, with computed sum(tf_idf_incorrect) score for raters
+    included in filter
   """
 
   ratings_w_user_totals = augmented_ratings[user_filter]
@@ -67,7 +69,7 @@ def _get_incorrect_tfidf_ratio(
   ) / np.log(
     1 + (rating_aggs_w_cnt["notHelpfulIncorrect_total"] / rating_aggs_w_cnt["cnt"])
   )  # p(incorrect over all rater ratings)
-  rating_aggs_w_cnt.drop("notHelpfulIncorrect_total", axis=1)
+  rating_aggs_w_cnt.drop("notHelpfulIncorrect_total", inplace=True, axis=1)
   rating_aggs_w_cnt.columns = [c.noteIdKey] + [
     f"{col}{suffix}" for col in rating_aggs_w_cnt.columns[1:]
   ]
diff --git a/sourcecode/scoring/matrix_factorization.py b/sourcecode/scoring/matrix_factorization.py
index 5ce33899..5bd1dac6 100644
--- a/sourcecode/scoring/matrix_factorization.py
+++ b/sourcecode/scoring/matrix_factorization.py
@@ -251,6 +251,72 @@ def _create_mf_model(
 
     return mf_model, optimizer, criterion
 
+  def _get_train_validate_sets(
+    self,
+    row: torch.LongTensor,
+    col: torch.LongTensor,
+    rating: torch.FloatTensor,
+    validate_percent: Optional[float] = None,
+  ) -> Tuple[
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.FloatTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.FloatTensor,
+  ]:
+    if validate_percent is not None:
+      random_indices = np.random.permutation(np.arange(len(row)))
+      validate_indices = random_indices[: int(validate_percent * len(row))]
+      train_indices = random_indices[int(validate_percent * len(row)) :]
+      row_validate = row[validate_indices]
+      col_validate = col[validate_indices]
+      rating_validate = rating[validate_indices]
+      row_train = row[train_indices]
+      col_train = col[train_indices]
+      rating_train = rating[train_indices]
+    else:
+      row_train = row
+      col_train = col
+      rating_train = rating
+      row_validate = None
+      col_validate = None
+      rating_validate = None
+    return row_train, col_train, rating_train, row_validate, col_validate, rating_validate
+
+  def _compute_and_print_loss(
+    self,
+    mf_model: BiasedMatrixFactorization,
+    loss_value: float,
+    criterion: torch.nn.modules.loss._Loss,
+    row_train: torch.LongTensor,
+    col_train: torch.LongTensor,
+    rating_train: torch.FloatTensor,
+    row_validate: Optional[torch.LongTensor],
+    col_validate: Optional[torch.LongTensor],
+    rating_validate: Optional[torch.FloatTensor],
+    epoch: int,
+    final: bool = False,
+  ) -> Tuple[float, float, Optional[float]]:
+    y_pred = mf_model(row_train, col_train)
+    train_loss_value = criterion(y_pred, rating_train).item()
+    if row_validate is not None:
+      y_pred_validate = mf_model(row_validate, col_validate)
+      validate_loss_value = criterion(y_pred_validate, rating_validate).item()
+    else:
+      validate_loss_value = None
+
+    if self._logging:
+      print("epoch", epoch, loss_value)
+      print("TRAIN FIT LOSS: ", train_loss_value)
+      if validate_loss_value is not None:
+        print("VALIDATE FIT LOSS: ", validate_loss_value)
+
+      if final == True:
+        self.test_errors.append(loss_value)
+        self.train_errors.append(train_loss_value)
+    return train_loss_value, loss_value, validate_loss_value
+
   def _fit_model(
     self,
     mf_model: BiasedMatrixFactorization,
@@ -259,7 +325,8 @@ def _fit_model(
     row: torch.LongTensor,
     col: torch.LongTensor,
     rating: torch.FloatTensor,
-  ) -> None:
+    validate_percent: Optional[float] = None,
+  ) -> Tuple[float, float, Optional[float]]:
     """Run gradient descent to train the model.
 
     Args:
@@ -270,23 +337,20 @@ def _fit_model(
         col (torch.LongTensor)
         rating (torch.FloatTensor)
     """
-    l2_lambda_intercept = self._l2_lambda * self._l2_intercept_multiplier
-
-    def print_loss(final=False):
-      y_pred = mf_model(row, col)
-      train_loss = criterion(y_pred, rating)
-
-      if self._logging:
-        print("epoch", epoch, loss.item())
-        print("TRAIN FIT LOSS: ", train_loss.item())
-        if final == True:
-          self.test_errors.append(loss.item())
-          self.train_errors.append(train_loss.item())
+    (
+      row_train,
+      col_train,
+      rating_train,
+      row_validate,
+      col_validate,
+      rating_validate,
+    ) = self._get_train_validate_sets(row, col, rating, validate_percent)
 
+    l2_lambda_intercept = self._l2_lambda * self._l2_intercept_multiplier
     prev_loss = 1e10
 
-    y_pred = mf_model(row, col)
-    loss = criterion(y_pred, rating)
+    y_pred = mf_model(row_train, col_train)
+    loss = criterion(y_pred, rating_train)
     l2_reg_loss = torch.tensor(0.0).to(mf_model.device)
 
     for name, param in mf_model.named_parameters():
@@ -299,7 +363,9 @@ def print_loss(final=False):
 
     epoch = 0
 
-    while abs(prev_loss - loss.item()) > self._convergence:
+    while (abs(loss.item() - prev_loss) > self._convergence) and (
+      not (epoch > 100 and loss.item() > prev_loss)
+    ):
 
       prev_loss = loss.item()
 
@@ -313,8 +379,8 @@ def print_loss(final=False):
       optimizer.zero_grad()
 
       # Predict and calculate loss
-      y_pred = mf_model(row, col)
-      loss = criterion(y_pred, rating)
+      y_pred = mf_model(row_train, col_train)
+      loss = criterion(y_pred, rating_train)
       l2_reg_loss = torch.tensor(0.0).to(mf_model.device)
 
       for name, param in mf_model.named_parameters():
@@ -326,13 +392,37 @@ def print_loss(final=False):
       loss += l2_reg_loss
 
       if epoch % 50 == 0:
-        print_loss()
+        self._compute_and_print_loss(
+          mf_model,
+          loss.item(),
+          criterion,
+          row_train,
+          col_train,
+          rating_train,
+          row_validate,
+          col_validate,
+          rating_validate,
+          epoch,
+          final=False,
+        )
 
       epoch += 1
 
     if self._logging:
       print("Num epochs:", epoch)
-      print_loss(final=True)
+    return self._compute_and_print_loss(
+      mf_model,
+      loss.item(),
+      criterion,
+      row_train,
+      col_train,
+      rating_train,
+      row_validate,
+      col_validate,
+      rating_validate,
+      epoch,
+      final=True,
+    )
 
   def run_mf(
     self,
@@ -341,7 +431,8 @@ def run_mf(
     userInit: pd.DataFrame = None,
     globalInterceptInit: Optional[float] = None,
     specificNoteId: Optional[int] = None,
-  ) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[float]]:
+    validatePercent: Optional[float] = None,
+  ):
     """Train matrix factorization model.
 
     See https://twitter.github.io/communitynotes/ranking-notes/#matrix-factorization
@@ -387,13 +478,8 @@ def run_mf(
       row = torch.LongTensor(noteRatingIds[_raterIndexKey].values).to(mf_model.device)
       col = torch.LongTensor(noteRatingIds[_noteIndexKey].values).to(mf_model.device)
 
-    self._fit_model(
-      mf_model,
-      optimizer,
-      criterion,
-      row,
-      col,
-      rating,
+    train_loss, loss, validate_loss = self._fit_model(
+      mf_model, optimizer, criterion, row, col, rating, validatePercent
     )
 
     assert mf_model.item_factors.weight.data.cpu().numpy().shape[0] == noteIdMap.shape[0]
@@ -409,7 +495,10 @@ def run_mf(
     )
 
     fitRaterParams.drop(_raterIndexKey, axis=1, inplace=True)
-    return fitNoteParams, fitRaterParams, globalIntercept
+    if validatePercent is None:
+      return fitNoteParams, fitRaterParams, globalIntercept
+    else:
+      return fitNoteParams, fitRaterParams, globalIntercept, train_loss, loss, validate_loss
 
   def _flip_factors_for_identification(
     self, noteIdMap: pd.DataFrame, raterIdMap: pd.DataFrame
diff --git a/sourcecode/scoring/mf_base_scorer.py b/sourcecode/scoring/mf_base_scorer.py
index a005f7dc..d301ddab 100644
--- a/sourcecode/scoring/mf_base_scorer.py
+++ b/sourcecode/scoring/mf_base_scorer.py
@@ -30,7 +30,7 @@ def __init__(
     crnhThresholdNoteFactorMultiplier: float = -0.8,
     crnhThresholdNMIntercept: float = -0.15,
     crnhThresholdUCBIntercept: float = -0.04,
-    crhThresholdLCBIntercept: float = 0.31,
+    crhThresholdLCBIntercept: float = 0.32,
     crhSuperThreshold: float = 0.5,
     inertiaDelta: float = 0.01,
     weightedTotalVotes: float = 1.0,
@@ -87,6 +87,10 @@ def __init__(
     self._weightedTotalVotes = weightedTotalVotes
     self._mfRanker = matrix_factorization.MatrixFactorization()
 
+  def get_crh_threshold(self) -> float:
+    """Return CRH threshold for general scoring logic."""
+    return self._crhThreshold
+
   def get_scored_notes_cols(self) -> List[str]:
     """Returns a list of columns which should be present in the scoredNotes output."""
     return [
@@ -160,6 +164,7 @@ def _score_notes_and_users(
         userScores pd.DataFrame: one row per user containing a column for each helpfulness score.
     """
     if self._seed is not None:
+      print(f"seeding with {self._seed}")
       torch.manual_seed(self._seed)
 
     # Removes ratings where either (1) the note did not receive enough ratings, or
diff --git a/sourcecode/scoring/mf_coverage_scorer.py b/sourcecode/scoring/mf_coverage_scorer.py
deleted file mode 100644
index 8287bc78..00000000
--- a/sourcecode/scoring/mf_coverage_scorer.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from typing import Dict, List, Optional
-
-from . import constants as c, matrix_factorization
-from .mf_core_scorer import MFCoreScorer
-
-
-class MFCoverageScorer(MFCoreScorer):
-  def __init__(self, seed: Optional[int] = None) -> None:
-    """Configure MFCoverageScorer object.
-
-    Args:
-      seed: if not None, seed value to ensure deterministic execution
-    """
-    super().__init__(seed)
-    self._crhThreshold = 0.338
-    self._pseudoraters = False
-    self._mfRanker = matrix_factorization.MatrixFactorization(l2_intercept_multiplier=7)
-
-  def _get_note_col_mapping(self) -> Dict[str, str]:
-    """Returns a dict mapping default note column names to custom names for a specific model."""
-    return {
-      c.internalNoteInterceptKey: c.coverageNoteInterceptKey,
-      c.internalNoteFactor1Key: c.coverageNoteFactor1Key,
-      c.internalRatingStatusKey: c.coverageRatingStatusKey,
-      c.noteInterceptMinKey: c.coverageNoteInterceptMinKey,
-      c.noteInterceptMaxKey: c.coverageNoteInterceptMaxKey,
-    }
-
-  def _get_user_col_mapping(self) -> Dict[str, str]:
-    """Returns a dict mapping default user column names to custom names for a specific model."""
-    return {}
-
-  def get_scored_notes_cols(self) -> List[str]:
-    """Returns a list of columns which should be present in the scoredNotes output."""
-    return [
-      c.noteIdKey,
-      c.coverageNoteInterceptKey,
-      c.coverageNoteFactor1Key,
-      c.coverageRatingStatusKey,
-      c.coverageNoteInterceptMinKey,
-      c.coverageNoteInterceptMaxKey,
-    ]
-
-  def get_helpfulness_scores_cols(self) -> List[str]:
-    """Returns a list of columns which should be present in the helpfulnessScores output."""
-    return []
-
-  def get_auxiliary_note_info_cols(self) -> List[str]:
-    """Returns a list of columns which should be present in the auxiliaryNoteInfo output."""
-    return []
-
-  def _get_dropped_note_cols(self) -> List[str]:
-    """Returns a list of columns which should be excluded from scoredNotes and auxiliaryNoteInfo."""
-    return super()._get_dropped_note_cols() + (
-      [
-        c.internalActiveRulesKey,
-        c.activeFilterTagsKey,
-        c.ratingWeightKey,
-      ]
-      + c.notHelpfulTagsAdjustedColumns
-      + c.notHelpfulTagsAdjustedRatioColumns
-      + c.incorrectFilterColumns
-      + c.noteParameterUncertaintyTSVAuxColumns
-    )
-
-  def _get_dropped_user_cols(self) -> List[str]:
-    """Returns a list of columns which should be excluded from helpfulnessScores output."""
-    return super()._get_dropped_user_cols() + [
-      c.raterParticipantIdKey,
-      c.internalRaterInterceptKey,
-      c.internalRaterFactor1Key,
-      c.crhCrnhRatioDifferenceKey,
-      c.meanNoteScoreKey,
-      c.raterAgreeRatioKey,
-      c.aboveHelpfulnessThresholdKey,
-    ]
diff --git a/sourcecode/scoring/mf_group_scorer.py b/sourcecode/scoring/mf_group_scorer.py
new file mode 100644
index 00000000..756c2047
--- /dev/null
+++ b/sourcecode/scoring/mf_group_scorer.py
@@ -0,0 +1,285 @@
+from typing import Dict, List, Optional, Tuple
+
+from . import constants as c
+from .mf_base_scorer import MFBaseScorer
+
+import numpy as np
+import pandas as pd
+
+
+# Number of MFGroupScorer objects we expect to instantiate
+groupScorerCount = 12
+
+
+def _coalesce_columns(df: pd.DataFrame, columnPrefix: str) -> pd.DataFrame:
+  """Condense all columns beginning with columnPrefix into a single column.
+
+  With each row there must be at most one column with a non-NaN value in the set of
+  columns beginning with columnPrefix.  If a non-NaN value is present that will
+  become the value in the condensed column, otherwise the value will be NaN.  After
+  column values are condensed the original (prefixed) columns will be dropped.
+
+  Args:
+    df: DataFrame containing columns to condense
+    collumnPrefix: Prefix used to detect columns to coalesce, and the name for
+      the output column.
+
+  Returns:
+    DataFrame with all columns prefixed by columnPrefix dropped and replaced by
+    a single column named columnPrefix
+
+  Raises:
+    AssertionError if multiple columns prefixed by columnPrefix have non-NaN values
+    for any row.
+  """
+  # Identify columns to coalesce
+  columns = [col for col in df.columns if col.startswith(f"{columnPrefix}_")]
+  if not columns:
+    return df
+  # Validate that at most one column is set, and store which rows have a column set
+  rowResults = np.invert(df[columns].isna()).sum(axis=1)
+  assert all(rowResults <= 1), "each row should only be in one modeling group"
+  # Coalesce results
+  def _get_value(row):
+    idx = row.first_valid_index()
+    return row[idx] if idx is not None else np.nan
+
+  coalesced = df[columns].apply(_get_value, axis=1)
+  # Drop old columns and replace with new
+  df = df.drop(columns=columns)
+  df[columnPrefix] = coalesced
+  return df
+
+
+def coalesce_group_models(
+  scoredNotes: pd.DataFrame, helpfulnessScores: pd.DataFrame
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+  """Coalesce all group modeling columns across note and user scoring.
+
+  Since each Scorer must have distinct output columns, we use coalescing to run
+  multiple instances of MFGroupScorer objects and then condense the results into
+  a single set of columns.  This approach works because each note will be scored
+  by at most one MFGroupScorer instance.
+
+  Args:
+    scoredNotes: scoring output for notes.
+    helpfulnessScores: scoring output for users.
+
+  Returns:
+    tuple containing coalesced scoring results for notes and users.
+  """
+  for col in [
+    c.groupNoteInterceptKey,
+    c.groupNoteFactor1Key,
+    c.groupRatingStatusKey,
+    c.groupNoteInterceptMaxKey,
+    c.groupNoteInterceptMinKey,
+    c.modelingGroupKey,
+  ]:
+    scoredNotes = _coalesce_columns(scoredNotes, col)
+
+  for col in [c.groupRaterInterceptKey, c.groupRaterFactor1Key, c.modelingGroupKey]:
+    helpfulnessScores = _coalesce_columns(helpfulnessScores, col)
+
+  return scoredNotes, helpfulnessScores
+
+
+class MFGroupScorer(MFBaseScorer):
+  def __init__(
+    self,
+    groupNumber: int,
+    seed: Optional[int] = None,
+    pseudoraters: Optional[bool] = False,
+    groupThreshold: float = 0.8,
+  ) -> None:
+    """Configure MFGroupScorer object.
+
+    Notice that each MFGroupScorer defines column names by appending the groupNumber to
+    column prefixes which are constant.  Dynamically defining the column names allows the
+    group scorer to be instantiated multiple times while maintaining the property that
+    the columns attached by each scorer remain unique.  Once all scorers have ran, we
+    (will) validate that each note was scored by at most one group scorer and then coalesce
+    all of the group scoring columns and remove the groupNumber suffix.
+
+    Args:
+      groupNumber: int indicating which group this scorer instance should filter for.
+      seed: if not None, seed value to ensure deterministic execution
+      pseudoraters: if True, compute optional pseudorater confidence intervals
+      groupThreshold: float indicating what fraction of ratings must be from within a group
+        for the model to be active
+    """
+    super().__init__(seed, pseudoraters)
+    assert groupNumber > 0, "groupNumber must be positive.  0 is reserved for unassigned."
+    assert groupNumber <= groupScorerCount, "groupNumber exceeds maximum expected groups."
+    self._groupNumber = groupNumber
+    self._groupThreshold = groupThreshold
+    self._groupNoteInterceptKey = f"{c.groupNoteInterceptKey}_{self._groupNumber}"
+    self._groupNoteFactor1Key = f"{c.groupNoteFactor1Key}_{self._groupNumber}"
+    self._groupRatingStatusKey = f"{c.groupRatingStatusKey}_{self._groupNumber}"
+    self._groupNoteInterceptMaxKey = f"{c.groupNoteInterceptMaxKey}_{self._groupNumber}"
+    self._groupNoteInterceptMinKey = f"{c.groupNoteInterceptMinKey}_{self._groupNumber}"
+    self._groupRaterInterceptKey = f"{c.groupRaterInterceptKey}_{self._groupNumber}"
+    self._groupRaterFactor1Key = f"{c.groupRaterFactor1Key}_{self._groupNumber}"
+    self._modelingGroupKey = f"{c.modelingGroupKey}_{self._groupNumber}"
+
+  def _get_note_col_mapping(self) -> Dict[str, str]:
+    """Returns a dict mapping default note column names to custom names for a specific model."""
+    return {
+      c.internalNoteInterceptKey: self._groupNoteInterceptKey,
+      c.internalNoteFactor1Key: self._groupNoteFactor1Key,
+      c.internalRatingStatusKey: self._groupRatingStatusKey,
+      c.noteInterceptMinKey: self._groupNoteInterceptMinKey,
+      c.noteInterceptMaxKey: self._groupNoteInterceptMaxKey,
+    }
+
+  def _get_user_col_mapping(self) -> Dict[str, str]:
+    """Returns a dict mapping default user column names to custom names for a specific model."""
+    return {
+      c.internalRaterInterceptKey: self._groupRaterInterceptKey,
+      c.internalRaterFactor1Key: self._groupRaterFactor1Key,
+    }
+
+  def get_scored_notes_cols(self) -> List[str]:
+    """Returns a list of columns which should be present in the scoredNotes output."""
+    return [
+      c.noteIdKey,
+      self._groupNoteInterceptKey,
+      self._groupNoteFactor1Key,
+      self._groupRatingStatusKey,
+      self._groupNoteInterceptMaxKey,
+      self._groupNoteInterceptMinKey,
+      self._modelingGroupKey,
+    ]
+
+  def get_helpfulness_scores_cols(self) -> List[str]:
+    """Returns a list of columns which should be present in the helpfulnessScores output."""
+    return [
+      c.raterParticipantIdKey,
+      self._groupRaterInterceptKey,
+      self._groupRaterFactor1Key,
+      self._modelingGroupKey,
+    ]
+
+  def get_auxiliary_note_info_cols(self) -> List[str]:
+    """Returns a list of columns which should be present in the auxiliaryNoteInfo output."""
+    return []
+
+  def _get_dropped_note_cols(self) -> List[str]:
+    """Returns a list of columns which should be excluded from scoredNotes and auxiliaryNoteInfo."""
+    return super()._get_dropped_note_cols() + (
+      [
+        c.internalActiveRulesKey,
+        c.activeFilterTagsKey,
+        c.ratingWeightKey,
+      ]
+      + c.notHelpfulTagsAdjustedColumns
+      + c.notHelpfulTagsAdjustedRatioColumns
+      + c.incorrectFilterColumns
+      + c.noteParameterUncertaintyTSVAuxColumns
+    )
+
+  def _get_dropped_user_cols(self) -> List[str]:
+    """Returns a list of columns which should be excluded from helpfulnessScores output."""
+    return super()._get_dropped_user_cols() + [
+      c.crhCrnhRatioDifferenceKey,
+      c.meanNoteScoreKey,
+      c.raterAgreeRatioKey,
+      c.aboveHelpfulnessThresholdKey,
+    ]
+
+  def _filter_input(
+    self, ratings: pd.DataFrame, noteStatusHistory: pd.DataFrame, userEnrollment: pd.DataFrame
+  ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """Prune the contents of ratings to only include ratings from users in the modeling group.
+
+    This function identifies the subset of ratings to include in group model scoring.
+    To improve modeling within the group, we only include ratings from users in the modeling
+    group.  However, we place no restriction on which notes to include in the model and instead
+    include ratings on any note.  Including ratings on any note increases the amount of data
+    available during training about each user, in effect also increasing the number of users
+    and notes we are able to include in the model.
+
+    Including notes by users outside of the modeling group means that the model will issue
+    scores for notes which do not meet group modeling criteria (i.e. >80% of ratings are
+    from users in the modeling group, and the author is also from the modeling group). We
+    enforce these criteria *after* scoring in _postprocess_output so that the maximum amount
+    of ratings are available during scoring.
+
+    Args:
+      ratings (pd.DataFrame): preprocessed ratings
+      noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status
+      userEnrollment (pd.DataFrame): one row per user specifying enrollment properties
+
+    Returns:
+      Tuple[pd.DataFrame, pd.DataFrame]:
+        ratings: ratings filtered to only contain rows of interest
+        noteStatusHistory: noteStatusHistory filtered to only contain rows of interest
+    """
+    userEnrollment = userEnrollment.rename(columns={c.participantIdKey: c.raterParticipantIdKey})
+    userEnrollment = userEnrollment[userEnrollment[c.modelingGroupKey] == self._groupNumber]
+    ratings = ratings.merge(userEnrollment[[c.raterParticipantIdKey]].drop_duplicates())
+    return ratings, noteStatusHistory
+
+  def _postprocess_output(
+    self,
+    noteScores: pd.DataFrame,
+    userScores: pd.DataFrame,
+    ratings: pd.DataFrame,
+    noteStatusHistory: pd.DataFrame,
+    userEnrollment: pd.DataFrame,
+  ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """Filter noteScores and userScores and add column containing modeling group.
+
+    Enforce the requirements that:
+      - Notes scored by this group model have >=80% of ratings from the modeling group.
+      - Notes scored by this group model were authored by a member of the modeling group.
+      - Users assigned a factor / intercept are in the modeling group.
+
+    Args:
+      noteScores: note outputs from scoring
+      userScores: user outputs from scoring
+      ratings (pd.DataFrame): preprocessed ratings
+      noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status
+      userEnrollment (pd.DataFrame): one row per user specifying enrollment properties
+
+    Returns:
+      Tuple[pd.DataFrame, pd.DataFrame]:
+        noteScores: filtered and updated note scoring output
+        userScores: filtered and updated user scoring output
+    """
+    # Prune notes according to authorship filter.
+    noteScores = noteScores.merge(
+      userEnrollment[[c.participantIdKey, c.modelingGroupKey]].rename(
+        columns={c.participantIdKey: c.noteAuthorParticipantIdKey}
+      ),
+      how="left",
+    )
+    noteScores = noteScores[noteScores[c.modelingGroupKey] == self._groupNumber]
+    noteScores = noteScores.drop(columns=c.modelingGroupKey)
+    # Identify notes with enough ratings from within the modeling group.
+    ratings = ratings.merge(
+      userEnrollment[[c.participantIdKey, c.modelingGroupKey]].rename(
+        columns={c.participantIdKey: c.raterParticipantIdKey}
+      ),
+      how="left",
+    )
+    ratings["inGroup"] = ratings[c.modelingGroupKey] == self._groupNumber
+    ratios = ratings[[c.noteIdKey, "inGroup"]].groupby(c.noteIdKey).mean().reset_index()
+    notesAboveThreshold = ratios[ratios["inGroup"] >= self._groupThreshold][[c.noteIdKey]]
+    noteScores = noteScores.merge(notesAboveThreshold)
+    # Note that even though ratings were restricted to the modeling group, users outside of
+    # the modeling group may still have authored a note which was rated and may consequently
+    # appear in the userScores.  Accordingly, we drop any user which was outside of the
+    # modeling group from the user scores.
+    userScores = userScores.merge(
+      userEnrollment[[c.participantIdKey, c.modelingGroupKey]].rename(
+        columns={c.participantIdKey: c.raterParticipantIdKey}
+      ),
+      how="left",
+    )
+    userScores = userScores[userScores[c.modelingGroupKey] == self._groupNumber]
+    userScores = userScores.drop(columns=c.modelingGroupKey)
+    # Set the modelingGroupKey column in each output
+    noteScores[self._modelingGroupKey] = self._groupNumber
+    userScores[self._modelingGroupKey] = self._groupNumber
+    return noteScores, userScores
diff --git a/sourcecode/scoring/note_ratings.py b/sourcecode/scoring/note_ratings.py
index dad9dcfc..df12a23f 100644
--- a/sourcecode/scoring/note_ratings.py
+++ b/sourcecode/scoring/note_ratings.py
@@ -21,10 +21,18 @@ def is_crh(scoredNotes, minRatingsNeeded, crhThreshold) -> pd.Series:
   )
 
 
-def is_crh_lcb(scoredNotes, minRatingsNeeded, crhThresholdLCBIntercept) -> pd.Series:
+def is_crh_lcb(scoredNotes, minRatingsNeeded, crhThresholdLCBIntercept, inertia=False) -> pd.Series:
   enoughRatings = scoredNotes[c.numRatingsKey] >= minRatingsNeeded
+
   if c.noteInterceptMinKey in scoredNotes.columns:
-    return enoughRatings & (scoredNotes[c.noteInterceptMinKey] >= crhThresholdLCBIntercept)
+    if inertia == False:
+      return enoughRatings & (scoredNotes[c.noteInterceptMinKey] >= crhThresholdLCBIntercept)
+    else:
+      return (
+        enoughRatings
+        & (scoredNotes[c.noteInterceptMinKey] >= crhThresholdLCBIntercept)
+        & (scoredNotes[c.currentLabelKey] == c.currentlyRatedHelpful)
+      )
   else:
     # all False
     return enoughRatings & (~enoughRatings)
@@ -489,6 +497,15 @@ def compute_scored_notes(
           crhThreshold,
           minRatingsNeeded,
         ),
+        scoring_rules.RuleFromFunction(
+          RuleID.LCB_INERTIA,
+          {RuleID.GENERAL_CRH},
+          c.currentlyRatedHelpful,
+          lambda noteStats: is_crh_lcb_function(
+            noteStats, minRatingsNeeded, crhThresholdLCBIntercept - inertiaDelta, True
+          ),
+          onlyApplyToNotesThatSayTweetIsMisleading=True,
+        ),
         scoring_rules.FilterTagOutliers(
           RuleID.TAG_OUTLIER,
           {RuleID.GENERAL_CRH},
diff --git a/sourcecode/scoring/process_data.py b/sourcecode/scoring/process_data.py
index bc74a22a..d62cd9c6 100644
--- a/sourcecode/scoring/process_data.py
+++ b/sourcecode/scoring/process_data.py
@@ -87,7 +87,7 @@ def tsv_parser(
     firstLine = rawTSV.split("\n")[0]
     if len(firstLine.split("\t")) != len(columns):
       raise ValueError
-    return pd.read_csv(
+    data = pd.read_csv(
       StringIO(rawTSV),
       sep="\t",
       names=columns,
@@ -95,13 +95,39 @@ def tsv_parser(
       header=0 if header else None,
       index_col=[],
     )
+    return data
   except (ValueError, IndexError):
     raise ValueError("invalid input")
 
 
-def tsv_reader(path: str, mapping, columns, header=False):
+# TODO: remove this function once modelingGroup column is fully launched
+def user_enrollment_parser(rawTSV: str, header: bool) -> pd.DataFrame:
+  """Parse user enrollment TSV and optinoally tolerate the modelingGroup column.
+
+  Args:
+    rawTSV: str contianing entire TSV input
+    header: bool indicating whether the input will have a header
+
+  Returns:
+    pd.DataFrame containing parsed data
+  """
+  try:
+    df = tsv_parser(rawTSV, c.userEnrollmentTSVTypeMapping, c.userEnrollmentTSVColumns, header)
+    df[c.modelingGroupKey] = 0
+  except ValueError:
+    df = tsv_parser(
+      rawTSV, c.userEnrollmentExpandedTSVTypeMapping, c.userEnrollmentExpandedTSVColumns, header
+    )
+  return df
+
+
+# TODO: remove support for specifying a custom parser once modelingGroup is fully rolled out
+def tsv_reader(path: str, mapping, columns, header=False, parser=tsv_parser):
   with open(path, "r") as handle:
-    return tsv_parser(handle.read(), mapping, columns, header)
+    if parser == tsv_parser:
+      return parser(handle.read(), mapping, columns, header)
+    else:
+      return parser(handle.read(), header)
 
 
 def read_from_tsv(
@@ -162,13 +188,13 @@ def read_from_tsv(
     userEnrollment = None
   else:
     userEnrollment = tsv_reader(
-      userEnrollmentPath, c.userEnrollmentTSVTypeMapping, c.userEnrollmentTSVColumns, header=headers
+      userEnrollmentPath, None, None, header=headers, parser=user_enrollment_parser
     )
-    assert len(userEnrollment.columns.values) == len(c.userEnrollmentTSVColumns) and all(
-      userEnrollment.columns == c.userEnrollmentTSVColumns
+    assert len(userEnrollment.columns.values) <= len(c.userEnrollmentExpandedTSVColumns) and (
+      len(set(userEnrollment.columns) - set(c.userEnrollmentExpandedTSVColumns)) == 0
     ), (
-      f"userEnrollment columns don't match: \n{[col for col in userEnrollment.columns if not col in c.userEnrollmentTSVColumns]} are extra columns, "
-      + f"\n{[col for col in c.userEnrollmentTSVColumns if not col in userEnrollment.columns]} are missing."
+      f"userEnrollment columns don't match: \n{[col for col in userEnrollment.columns if not col in c.userEnrollmentExpandedTSVColumns]} are extra columns, "
+      + f"\n{[col for col in c.userEnrollmentExpandedTSVColumns if not col in userEnrollment.columns]} are missing."
     )
 
   return notes, ratings, noteStatusHistory, userEnrollment
diff --git a/sourcecode/scoring/run_scoring.py b/sourcecode/scoring/run_scoring.py
index b6e02faa..86a5d7f4 100644
--- a/sourcecode/scoring/run_scoring.py
+++ b/sourcecode/scoring/run_scoring.py
@@ -7,15 +7,17 @@
 
 from collections import namedtuple
 import concurrent.futures
+from itertools import chain
 import multiprocessing
 import time
-from typing import List, Optional, Tuple
+from typing import Dict, List, Optional, Set, Tuple
 
 from . import constants as c, contributor_state, note_ratings, note_status_history, scoring_rules
+from .enums import Scorers
 from .mf_base_scorer import MFBaseScorer
 from .mf_core_scorer import MFCoreScorer
-from .mf_coverage_scorer import MFCoverageScorer
 from .mf_expansion_scorer import MFExpansionScorer
+from .mf_group_scorer import coalesce_group_models, groupScorerCount, MFGroupScorer
 from .scorer import Scorer
 from .scoring_rules import RuleID
 
@@ -26,17 +28,88 @@
 ModelResult = namedtuple("ModelResult", ["scoredNotes", "helpfulnessScores", "auxiliaryNoteInfo"])
 
 
-def _get_scorers(seed: Optional[int], pseudoraters: Optional[bool]) -> List[Scorer]:
+def _check_flips(
+  scoredNotes: pd.DataFrame,
+  noteStatusHistory: pd.DataFrame,
+  userEnrollment: pd.DataFrame,
+  modelingPopulations: List[str],
+  pctFlips: float,
+  resultCol: str,
+) -> bool:
+  """
+  Args:
+    scoredNotes (pd.DataFrame): notes with scores returned by MF scoring algorithm to evaluate against NSH for flips
+    noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status
+    userEnrollment (pd.DataFrame): The enrollment state for each contributor
+    modelingPopulation (str): name of modelingPopulation(s) of authors for which to evaluate CRH flips
+    pctFlips (float): percent of flips of unlocked notes that triggers a rerun
+    resultCol (str): name of column with noteStatus for model
+
+  Returns:
+    bool whether unlocked note flips exceed acceptable level, triggering a rerun
+  """
+  # combine noteStatusHistory with userEnrollment modelingPopulation
+  popNsh = noteStatusHistory.merge(
+    userEnrollment[[c.participantIdKey, c.modelingPopulationKey]],
+    left_on=c.noteAuthorParticipantIdKey,
+    right_on=c.participantIdKey,
+    how="inner",
+  )
+  unlockedPopNsh = popNsh.loc[
+    (popNsh[c.modelingPopulationKey].isin(modelingPopulations))
+    & (popNsh[c.timestampMillisOfStatusLockKey].isna())
+  ]
+
+  # check how many unlocked notes that appeared in previous run have now flipped status
+  unlockedPopNsh = unlockedPopNsh.merge(
+    scoredNotes[[c.noteIdKey, resultCol]], on=c.noteIdKey, how="inner"
+  )
+
+  statusFlips = len(
+    unlockedPopNsh.loc[
+      (unlockedPopNsh[c.currentLabelKey] != unlockedPopNsh[resultCol])
+      & (
+        (unlockedPopNsh[c.currentLabelKey] == c.currentlyRatedHelpful)
+        | (unlockedPopNsh[resultCol] == c.currentlyRatedHelpful)
+      )
+    ]
+  )
+  # take max w/1 to avoid divide by zero error when no prevCRH
+  prevCrh = max(
+    1, len(unlockedPopNsh.loc[unlockedPopNsh[c.currentLabelKey] == c.currentlyRatedHelpful])
+  )
+  print(f"Percentage of previous CRH flipping status: {(statusFlips / prevCrh)}")
+
+  return (statusFlips / prevCrh) > pctFlips
+
+
+def _get_scorers(
+  seed: Optional[int], pseudoraters: Optional[bool], enabledScorers: Optional[Set[Scorers]]
+) -> Dict[Scorers, List[Scorer]]:
   """Instantiate all Scorer objects which should be used for note ranking.
 
   Args:
     seed (int, optional): if not None, base distinct seeds for the first and second MF rounds on this value
     pseudoraters (bool, optional): if True, compute optional pseudorater confidence intervals
+    enabledScorers: if not None, set of which scorers should be instantiated and enabled
 
   Returns:
-    List[Scorer] containing instantiated Scorer objects for note ranking.
+    Dict[Scorers, List[Scorer]] containing instantiated Scorer objects for note ranking.
   """
-  return [MFCoreScorer(seed, pseudoraters), MFExpansionScorer(seed), MFCoverageScorer(seed)]
+  scorers: Dict[Scorers, List[Scorer]] = dict()
+
+  if enabledScorers is None or Scorers.MFCoreScorer in enabledScorers:
+    scorers[Scorers.MFCoreScorer] = [MFCoreScorer(seed, pseudoraters)]
+  if enabledScorers is None or Scorers.MFExpansionScorer in enabledScorers:
+    scorers[Scorers.MFExpansionScorer] = [MFExpansionScorer(seed)]
+  if enabledScorers is None or Scorers.MFGroupScorer in enabledScorers:
+    # Note that index 0 is reserved, corresponding to no group assigned, so scoring group
+    # numbers begin with index 1.
+    scorers[Scorers.MFGroupScorer] = [
+      MFGroupScorer(groupNumber=i) for i in range(1, groupScorerCount + 1)
+    ]
+
+  return scorers
 
 
 def _merge_results(
@@ -94,13 +167,42 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen
   runCounter = 0
   scorerStartTime = time.perf_counter()
   result = None
-
-  if isinstance(scorer, (MFBaseScorer, MFCoreScorer, MFCoverageScorer, MFExpansionScorer)):
-    while (len(scorer._mfRanker.train_errors) == 0) or (
-      scorer._mfRanker.train_errors[-1] > c.maxTrainError
+  flips = False
+
+  # TODO: encapsulate this at a lower level so we're not accessing private state and can deal
+  # with the case where there are no ratings for a scorer to run on after filtering.
+  if isinstance(scorer, (MFBaseScorer, MFCoreScorer, MFExpansionScorer)):
+    while (
+      (len(scorer._mfRanker.train_errors) == 0)
+      or (scorer._mfRanker.train_errors[-1] > c.maxTrainError)
+      or (flips and runCounter < c.maxReruns)
     ):
       runCounter += 1
       result = ModelResult(*scorer.score(ratings, noteStatusHistory, userEnrollment))
+      if runCounter > c.maxReruns:
+        break
+      # check for notes createdat > lock time, not more than x% have changed status from nsh
+      if isinstance(scorer, MFExpansionScorer):
+        flips = _check_flips(
+          result.scoredNotes,
+          noteStatusHistory,
+          userEnrollment,
+          [c.expansion],
+          c.expansionFlipPct,
+          c.expansionRatingStatusKey,
+        )
+      elif isinstance(scorer, MFCoreScorer):
+        flips = _check_flips(
+          result.scoredNotes,
+          noteStatusHistory,
+          userEnrollment,
+          [c.core],
+          c.coreFlipPct,
+          c.coreRatingStatusKey,
+        )
+      if flips:
+        if scorer._seed is not None:
+          scorer._seed = scorer._seed + 1
   else:
     result = ModelResult(*scorer.score(ratings, noteStatusHistory, userEnrollment))
     runCounter += 1
@@ -178,12 +280,17 @@ def _run_scorers(
       modelResult.helpfulnessScores,
       modelResult.auxiliaryNoteInfo,
     )
+  scoredNotes, helpfulnessScores = coalesce_group_models(scoredNotes, helpfulnessScores)
 
   return scoredNotes, helpfulnessScores, auxiliaryNoteInfo
 
 
 def meta_score(
-  scoredNotes: pd.DataFrame, auxiliaryNoteInfo: pd.DataFrame, lockedStatus: pd.DataFrame
+  scorers: Dict[Scorers, List[Scorer]],
+  scoredNotes: pd.DataFrame,
+  auxiliaryNoteInfo: pd.DataFrame,
+  lockedStatus: pd.DataFrame,
+  enabledScorers: Optional[Set[Scorers]],
 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
   """Determine final note status based on individual scoring results.
 
@@ -196,6 +303,7 @@ def meta_score(
     scoredNotes: pd.DataFrame containing all scored note results.
     auxiliaryNoteInfo: pd.DataFrame containing tag aggregates
     lockedStatus: pd.DataFrame containing {noteId, status} pairs for all notes
+    enabledScorers: if not None, set of which scorers should be instantiated and enabled
 
   Returns:
     Tuple[pd.DataFrame, pd.DataFrame]:
@@ -209,32 +317,62 @@ def meta_score(
     on=c.noteIdKey,
   )
   assert len(scoredNotes) == len(auxiliaryNoteInfo)
-  rules = [
-    scoring_rules.DefaultRule(RuleID.META_INITIAL_NMR, set(), c.needsMoreRatings),
-    scoring_rules.ApplyModelResult(
-      RuleID.EXPANSION_MODEL, {RuleID.META_INITIAL_NMR}, c.expansionRatingStatusKey
-    ),
-    scoring_rules.ApplyModelResult(
-      RuleID.CORE_MODEL, {RuleID.EXPANSION_MODEL}, c.coreRatingStatusKey
-    ),
-    scoring_rules.ApplyAdditiveModelResult(
-      RuleID.COVERAGE_MODEL, {RuleID.CORE_MODEL}, c.coverageRatingStatusKey, 0.38
-    ),
-    scoring_rules.ScoringDriftGuard(RuleID.SCORING_DRIFT_GUARD, {RuleID.CORE_MODEL}, lockedStatus),
-    # TODO: The rule below both sets tags for notes which are CRH / CRNH and unsets status for
-    # any notes which are CRH / CRNH but don't have enough ratings to assign two tags.  The later
-    # behavior can lead to unsetting locked status.  We should refactor this code to (1) remove
-    # the behavior which unsets status (instead tags will be assigned on a best effort basis) and
-    # (2) set tags in logic which is not run as a ScoringRule (since ScoringRules function to
-    # update note status).
-    scoring_rules.InsufficientExplanation(
-      RuleID.INSUFFICIENT_EXPLANATION,
-      {RuleID.CORE_MODEL},
-      c.needsMoreRatings,
-      c.minRatingsToGetTag,
-      c.minTagsNeededForStatus,
-    ),
+  rules: List[scoring_rules.ScoringRule] = [
+    scoring_rules.DefaultRule(RuleID.META_INITIAL_NMR, set(), c.needsMoreRatings)
   ]
+  # Only attach meta-scoring rules for models which actually run.
+  if enabledScorers is None or Scorers.MFExpansionScorer in enabledScorers:
+    rules.append(
+      scoring_rules.ApplyModelResult(
+        RuleID.EXPANSION_MODEL, {RuleID.META_INITIAL_NMR}, c.expansionRatingStatusKey
+      )
+    )
+  if enabledScorers is None or Scorers.MFCoreScorer in enabledScorers:
+    rules.append(
+      scoring_rules.ApplyModelResult(
+        RuleID.CORE_MODEL, {RuleID.META_INITIAL_NMR}, c.coreRatingStatusKey
+      )
+    )
+  if enabledScorers is None or Scorers.MFGroupScorer in enabledScorers:
+    # TODO: modify this code to work when MFExpansionScorer is disabled by the system test
+    assert len(scorers[Scorers.MFCoreScorer]) == 1
+    assert len(scorers[Scorers.MFExpansionScorer]) == 1
+    coreScorer = scorers[Scorers.MFCoreScorer][0]
+    assert isinstance(coreScorer, MFCoreScorer)
+    expnasionScorer = scorers[Scorers.MFExpansionScorer][0]
+    assert isinstance(expnasionScorer, MFExpansionScorer)
+    coreCrhThreshold = coreScorer.get_crh_threshold()
+    expansionCrhThreshold = expnasionScorer.get_crh_threshold()
+    for i in range(1, groupScorerCount + 1):
+      rules.append(
+        scoring_rules.ApplyGroupModelResult(
+          RuleID[f"GROUP_MODEL_{i}"],
+          {RuleID.EXPANSION_MODEL, RuleID.CORE_MODEL},
+          i,
+          coreCrhThreshold,
+          expansionCrhThreshold,
+        )
+      )
+  rules.extend(
+    [
+      scoring_rules.ScoringDriftGuard(
+        RuleID.SCORING_DRIFT_GUARD, {RuleID.CORE_MODEL}, lockedStatus
+      ),
+      # TODO: The rule below both sets tags for notes which are CRH / CRNH and unsets status for
+      # any notes which are CRH / CRNH but don't have enough ratings to assign two tags.  The later
+      # behavior can lead to unsetting locked status.  We should refactor this code to (1) remove
+      # the behavior which unsets status (instead tags will be assigned on a best effort basis) and
+      # (2) set tags in logic which is not run as a ScoringRule (since ScoringRules function to
+      # update note status).
+      scoring_rules.InsufficientExplanation(
+        RuleID.INSUFFICIENT_EXPLANATION,
+        {RuleID.CORE_MODEL},
+        c.needsMoreRatings,
+        c.minRatingsToGetTag,
+        c.minTagsNeededForStatus,
+      ),
+    ]
+  )
   scoredNotes[c.firstTagKey] = np.nan
   scoredNotes[c.secondTagKey] = np.nan
   scoringResult = scoring_rules.apply_scoring_rules(
@@ -405,6 +543,26 @@ def _compute_helpfulness_scores(
   return helpfulnessScores
 
 
+def _add_deprecated_columns(scoredNotes: pd.DataFrame) -> pd.DataFrame:
+  """Impute columns which are no longer used but must be maintained in output.
+
+  Args:
+    scoredNotes: DataFrame containing note scoring output
+
+  Returns:
+    scoredNotes augmented to include deprecated columns filled with dummy values
+  """
+  for column, columnType in c.deprecatedNoteModelOutputTSVColumnsAndTypes:
+    assert column not in scoredNotes.columns
+    if columnType == np.double:
+      scoredNotes[column] = np.nan
+    elif columnType == np.str:
+      scoredNotes[column] = ""
+    else:
+      assert False, f"column type {columnType} unsupported"
+  return scoredNotes
+
+
 def _validate(
   scoredNotes: pd.DataFrame,
   helpfulnessScores: pd.DataFrame,
@@ -447,6 +605,8 @@ def run_scoring(
   userEnrollment: pd.DataFrame,
   seed: Optional[int] = None,
   pseudoraters: Optional[bool] = True,
+  enabledScorers: Optional[Set[Scorers]] = None,
+  strictColumns: bool = True,
 ):
   """Invokes note scoring algorithms, merges results and computes user stats.
 
@@ -456,6 +616,8 @@ def run_scoring(
     userEnrollment (pd.DataFrame): The enrollment state for each contributor
     seed (int, optional): if not None, base distinct seeds for the first and second MF rounds on this value
     pseudoraters (bool, optional): if True, compute optional pseudorater confidence intervals
+    enabledScorers (Set[Scorers], optional): Scorers which should be instantiated
+    strictColumns (bool, optional): if True, validate which columns are present
 
   Returns:
     Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
@@ -465,9 +627,9 @@ def run_scoring(
       auxiliaryNoteInfo: one row per note containing adjusted and ratio tag values
   """
   # Apply individual scoring models and obtained merged result.
-  scorers = _get_scorers(seed, pseudoraters)
+  scorers = _get_scorers(seed, pseudoraters, enabledScorers)
   scoredNotes, helpfulnessScores, auxiliaryNoteInfo = _run_scorers(
-    scorers, ratings, noteStatusHistory, userEnrollment
+    list(chain(*scorers.values())), ratings, noteStatusHistory, userEnrollment
   )
 
   # Augment scoredNotes and auxiliaryNoteInfo with additional attributes for each note
@@ -479,7 +641,11 @@ def run_scoring(
 
   # Assign final status to notes based on individual model scores and note attributes.
   scoredNotesCols, auxiliaryNoteInfoCols = meta_score(
-    scoredNotes, auxiliaryNoteInfo, noteStatusHistory[[c.noteIdKey, c.lockedStatusKey]]
+    scorers,
+    scoredNotes,
+    auxiliaryNoteInfo,
+    noteStatusHistory[[c.noteIdKey, c.lockedStatusKey]],
+    enabledScorers,
   )
   scoredNotes = scoredNotes.merge(scoredNotesCols, on=c.noteIdKey)
   auxiliaryNoteInfo = auxiliaryNoteInfo.merge(auxiliaryNoteInfoCols, on=c.noteIdKey)
@@ -505,5 +671,10 @@ def run_scoring(
     noteStatusHistory
   ), "noteStatusHistory should contain all notes after preprocessing"
 
-  # Finalize output dataframes with correct columns.
-  return _validate(scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo)
+  # Skip validation and selection out output columns if the set of scorers is overridden.
+  scoredNotes = _add_deprecated_columns(scoredNotes)
+  if strictColumns:
+    scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo = _validate(
+      scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo
+    )
+  return scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo
diff --git a/sourcecode/scoring/scorer.py b/sourcecode/scoring/scorer.py
index 20d0ff8e..b01c42ba 100644
--- a/sourcecode/scoring/scorer.py
+++ b/sourcecode/scoring/scorer.py
@@ -58,6 +58,35 @@ def _filter_input(
     """
     return ratings, noteStatusHistory
 
+  def _postprocess_output(
+    self,
+    noteScores: pd.DataFrame,
+    userScores: pd.DataFrame,
+    ratings: pd.DataFrame,
+    noteStatusHistory: pd.DataFrame,
+    userEnrollment: pd.DataFrame,
+  ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """Prune noteScores and userScores and augment with any additional columns as necessary.
+
+    Note that ratings, noteStatusHistory and userEnrollment are expected to be the *raw*
+    versions which were supplied to "score", not the version output after filtering.
+    Operating on the raw versions allows accurately computing statistics over the entire dataset
+    (e.g. fraction of users from a modeling group).
+
+    Args:
+      noteScores (pd.DataFrame): scoring output for notes
+      userScores (pd.DataFrame): scoirng output for users
+      ratings (pd.DataFrame): preprocessed ratings
+      noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status
+      userEnrollment (pd.DataFrame): one row per user specifying enrollment properties
+
+    Returns:
+      Tuple[pd.DataFrame, pd.DataFrame]:
+        noteScores: note scoring output from _score_notes_and_users
+        userScores: user scoring output from _score_notes_and_users
+    """
+    return noteScores, userScores
+
   def _get_note_col_mapping(self) -> Dict[str, str]:
     """Returns a dict mapping default note column names to custom names for a specific model."""
     return {}
@@ -83,14 +112,17 @@ def _score_notes_and_users(
     """
 
   def score(
-    self, ratings: pd.DataFrame, noteStatusHistory: pd.DataFrame, userEnrollment: pd.DataFrame
+    self,
+    ratingsRaw: pd.DataFrame,
+    noteStatusHistoryRaw: pd.DataFrame,
+    userEnrollmentRaw: pd.DataFrame,
   ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame]]:
     """Process ratings to assign status to notes and optionally compute rater properties.
 
     Args:
-      ratings (pd.DataFrame): preprocessed ratings
-      noteStatusHistory (pd.DataFrame): one row per note; history of when note had each status
-      userEnrollment (pd.DataFrame): one row per user specifying enrollment properties
+      ratingsRaw (pd.DataFrame): preprocessed ratings
+      noteStatusHistoryRaw (pd.DataFrame): one row per note; history of when note had each status
+      userEnrollmentRaw (pd.DataFrame): one row per user specifying enrollment properties
 
     Returns:
       Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
@@ -99,10 +131,27 @@ def score(
         auxiliaryNoteInfo: one row per note containing adjusted and ratio tag values
     """
     # Transform input, run core scoring algorithm, transform output.
-    ratings, noteStatusHistory = self._filter_input(ratings, noteStatusHistory, userEnrollment)
+    ratings, noteStatusHistory = self._filter_input(
+      ratingsRaw, noteStatusHistoryRaw, userEnrollmentRaw
+    )
+    # If there are no ratings left after filtering, then return empty dataframes.
+    if len(ratings) == 0:
+      return (
+        pd.DataFrame(columns=self.get_scored_notes_cols()),
+        pd.DataFrame(columns=self.get_helpfulness_scores_cols())
+        if self.get_helpfulness_scores_cols()
+        else None,
+        pd.DataFrame(columns=self.get_auxiliary_note_info_cols())
+        if self.get_auxiliary_note_info_cols()
+        else None,
+      )
     noteScores, userScores = self._score_notes_and_users(ratings, noteStatusHistory)
+    noteScores, userScores = self._postprocess_output(
+      noteScores, userScores, ratingsRaw, noteStatusHistoryRaw, userEnrollmentRaw
+    )
     noteScores = noteScores.rename(columns=self._get_note_col_mapping())
     userScores = userScores.rename(columns=self._get_user_col_mapping())
+    # TODO: Tolerate unexpcted columns if --nostrict-columns is set.
     # Process noteScores
     noteScores = noteScores.drop(columns=self._get_dropped_note_cols())
     assert set(noteScores.columns) == set(
@@ -112,7 +161,12 @@ def score(
     Missing expected columns that should've been in noteScores: {set(self.get_scored_notes_cols() + self.get_auxiliary_note_info_cols()) - set(noteScores.columns)}"""
     # Process userScores
     userScores = userScores.drop(columns=self._get_dropped_user_cols())
-    assert set(userScores.columns) == set(self.get_helpfulness_scores_cols())
+    assert set(userScores.columns) == set(
+      self.get_helpfulness_scores_cols()
+    ), f"""all columns must be either dropped or explicitly defined in an output. 
+    Extra columns that were in userScores: {set(userScores.columns) - set(self.get_helpfulness_scores_cols())}
+    Missing expected columns that should've been in userScores: {set(self.get_helpfulness_scores_cols()) - set(userScores.columns)}"""
+
     # Return dataframes with specified columns in specified order
     return (
       noteScores[self.get_scored_notes_cols()],
diff --git a/sourcecode/scoring/scoring_rules.py b/sourcecode/scoring/scoring_rules.py
index 583b3b1a..ea9bda94 100644
--- a/sourcecode/scoring/scoring_rules.py
+++ b/sourcecode/scoring/scoring_rules.py
@@ -28,6 +28,7 @@ class RuleID(Enum):
   NM_CRNH = RuleAndVersion("NmCRNH", "1.0", False)
   GENERAL_CRH_INERTIA = RuleAndVersion("GeneralCRHInertia", "1.0", False)
   ELEVATED_CRH_INERTIA = RuleAndVersion("ElevatedCRHInertia", "1.0", False)
+  LCB_INERTIA = RuleAndVersion("LcbCRHInertia", "1.0", False)
   INCORRECT_OUTLIER = RuleAndVersion("FilterIncorrect", "1.0", False)
 
   # Rules used in _meta_score.
@@ -35,6 +36,18 @@ class RuleID(Enum):
   EXPANSION_MODEL = RuleAndVersion("ExpansionModel", "1.1", False)
   CORE_MODEL = RuleAndVersion("CoreModel", "1.1", True)
   COVERAGE_MODEL = RuleAndVersion("CoverageModel", "1.1", False)
+  GROUP_MODEL_1 = RuleAndVersion("GroupModel01", "1.1", False)
+  GROUP_MODEL_2 = RuleAndVersion("GroupModel02", "1.1", False)
+  GROUP_MODEL_3 = RuleAndVersion("GroupModel03", "1.1", False)
+  GROUP_MODEL_4 = RuleAndVersion("GroupModel04", "1.1", False)
+  GROUP_MODEL_5 = RuleAndVersion("GroupModel05", "1.1", False)
+  GROUP_MODEL_6 = RuleAndVersion("GroupModel06", "1.1", False)
+  GROUP_MODEL_7 = RuleAndVersion("GroupModel07", "1.1", False)
+  GROUP_MODEL_8 = RuleAndVersion("GroupModel08", "1.1", False)
+  GROUP_MODEL_9 = RuleAndVersion("GroupModel09", "1.1", False)
+  GROUP_MODEL_10 = RuleAndVersion("GroupModel10", "1.1", False)
+  GROUP_MODEL_11 = RuleAndVersion("GroupModel11", "1.1", False)
+  GROUP_MODEL_12 = RuleAndVersion("GroupModel12", "1.1", False)
   INSUFFICIENT_EXPLANATION = RuleAndVersion("InsufficientExplanation", "1.0", True)
   SCORING_DRIFT_GUARD = RuleAndVersion("ScoringDriftGuard", "1.0", False)
 
@@ -185,56 +198,6 @@ def score_notes(
     return (noteStatusUpdates, None)
 
 
-class ApplyAdditiveModelResult(ScoringRule):
-  def __init__(
-    self,
-    ruleID: RuleID,
-    dependencies: Set[RuleID],
-    sourceColumn: str,
-    coreThreshold: float,
-  ):
-    """Set CRH status based on probationary model subject to safeguard threshold on core model.
-
-    This rule sets CRH note status based on probationary models subject to several criteria:
-      * The note must be have CRH status from the probationary model.
-      * The note must currently be scored as NMR.  This criteria guarantees that (1) probationary
-        models strictly expand coverage and (2) notes which were rated CRH by the core
-        model never have the decidedBy field overwritten by a less confident model.
-      * The note must score above a defined threshold on the core model.  This criteria acts as
-        safeguard against dangerous detections from probationary models.
-
-    Args:
-      rule: enum corresponding to a namedtuple defining a rule name and version string for the ScoringRule.
-      dependencies: Rules which must run before this rule can run.
-      sourceColumn: column containing note status (CRH, CRNH, NMR) to propagate to output.
-      coreThreshold: minimum score which notes must receive from the core model.
-    """
-    super().__init__(ruleID, dependencies)
-    self._sourceColumn = sourceColumn
-    self._coreThreshold = coreThreshold
-
-  def score_notes(
-    self, noteStats: pd.DataFrame, currentLabels: pd.DataFrame, statusColumn: str
-  ) -> (Tuple[pd.DataFrame, Optional[pd.DataFrame]]):
-    """Flip notes from NMR to CRH based on probationary model and subject to core model safeguard."""
-    # Identify notes which meet each of the 3 criteria.
-    probationaryCRHNotes = noteStats[noteStats[self._sourceColumn] == c.currentlyRatedHelpful][
-      [c.noteIdKey]
-    ]
-    currentNMRNotes = currentLabels[currentLabels[statusColumn] == c.needsMoreRatings][
-      [c.noteIdKey]
-    ]
-    aboveSafeGuard = noteStats[noteStats[c.coreNoteInterceptKey] > self._coreThreshold][
-      [c.noteIdKey]
-    ]
-    # Identify overlap and return status update.
-    noteStatusUpdates = probationaryCRHNotes.merge(
-      currentNMRNotes, on=c.noteIdKey, how="inner"
-    ).merge(aboveSafeGuard, on=c.noteIdKey, how="inner")
-    noteStatusUpdates[statusColumn] = c.currentlyRatedHelpful
-    return (noteStatusUpdates, None)
-
-
 class FilterTagOutliers(ScoringRule):
   def __init__(
     self,
@@ -357,6 +320,92 @@ def score_notes(
     return (noteStatusUpdates, None)
 
 
+class ApplyGroupModelResult(ScoringRule):
+  def __init__(
+    self,
+    ruleID: RuleID,
+    dependencies: Set[RuleID],
+    groupNumber: int,
+    coreCrhThreshold: float,
+    expansionCrhThreshold: float,
+    minSafeguardThreshold: float = 0.3,
+  ):
+    """Set CRH status based on a modeling group result.
+
+    This rule sets CRH note status based on group models subject to several criteria:
+      * The note must be have CRH status from the group model.
+      * The note must currently be scored as NMR.  This criteria guarantees that (1) group
+        models strictly expand coverage and (2) notes which were rated CRH by the core
+        model never have the decidedBy field overwritten by a less confident model.
+      * The note must have an intercept from either the core or expansion models, and the intercept
+        of the most confident model must fall within a defined range.  We construct the range
+        to guarantee we can avoid CRHing notes which substantially lacked broad appeal, and
+        to guarantee that we will not CRH a note which was blocked by tag or inaccuracy filtering
+        from either the core or expansion models, as applicable.
+
+    Args:
+      ruleID: enum corresponding to a namedtuple defining a rule name and version string for the ScoringRule.
+      dependencies: Rules which must run before this rule can run.
+      groupNumber: modeling group index which this instance of ApplyGroupModelResult should act on.
+      coreCrhThreshold: maximum intercept allowed on core model for group model CRH notes.
+      expansionCrhThreshold: maximum intercept allowed on expansion model for group model CRH notes.
+      minSafeguardThreshold: minimum intercept for core or expansion model.
+    """
+    super().__init__(ruleID, dependencies)
+    self._groupNumber = groupNumber
+    self._minSafeguardThreshold = minSafeguardThreshold
+    self._coreCrhThreshold = coreCrhThreshold
+    self._expansionCrhThreshold = expansionCrhThreshold
+
+  def score_notes(
+    self, noteStats: pd.DataFrame, currentLabels: pd.DataFrame, statusColumn: str
+  ) -> (Tuple[pd.DataFrame, Optional[pd.DataFrame]]):
+    """Flip notes from NMR to CRH based on group models and subject to core/expansion model safeguards."""
+    # Identify notes which were CRH from the applicable group model.
+    probationaryCRHNotes = noteStats[
+      (noteStats[c.groupRatingStatusKey] == c.currentlyRatedHelpful)
+      & (noteStats[c.modelingGroupKey] == self._groupNumber)
+    ][[c.noteIdKey]]
+    # Identify notes which are currently NMR.
+    currentNMRNotes = currentLabels[currentLabels[statusColumn] == c.needsMoreRatings][
+      [c.noteIdKey]
+    ]
+    # Identify notes which pass score bound checks for expansion and core models.
+    noteStats = noteStats[[c.noteIdKey, c.coreNoteInterceptKey, c.expansionNoteInterceptKey]].copy()
+    noteStats["core"] = (noteStats[c.coreNoteInterceptKey] < self._coreCrhThreshold) & (
+      noteStats[c.coreNoteInterceptKey] > self._minSafeguardThreshold
+    )
+    noteStats.loc[noteStats[c.coreNoteInterceptKey].isna(), "core"] = np.nan
+    noteStats["expansion"] = (
+      noteStats[c.expansionNoteInterceptKey] < self._expansionCrhThreshold
+    ) & (noteStats[c.expansionNoteInterceptKey] > self._minSafeguardThreshold)
+    noteStats.loc[noteStats[c.expansionNoteInterceptKey].isna(), "expansion"] = np.nan
+
+    def _get_value(row):
+      idx = row.first_valid_index()
+      # If either core or expansion had an intercept then return whether it was in the valid
+      # range.  If neither had an intercept, return False.  Preference is given to core due
+      # to the ordering when selecting columns from noteStats below.
+      if idx is None:
+        return False
+      elif row[idx] == 1.0:
+        return True
+      elif row[idx] == 0.0:
+        return False
+      else:
+        assert False, f"unexpected value: {row[idx]}"
+
+    noteStats["actionable"] = noteStats[["core", "expansion"]].apply(_get_value, axis=1)
+    actionableNotes = noteStats[noteStats["actionable"]][[c.noteIdKey]]
+
+    # Identify overlap and return status update.
+    noteStatusUpdates = probationaryCRHNotes.merge(
+      currentNMRNotes, on=c.noteIdKey, how="inner"
+    ).merge(actionableNotes, on=c.noteIdKey, how="inner")
+    noteStatusUpdates[statusColumn] = c.currentlyRatedHelpful
+    return (noteStatusUpdates, None)
+
+
 class InsufficientExplanation(ScoringRule):
   def __init__(
     self,

From fed8763ce29ba62e5af5820e6aad62f8d1bf6ee7 Mon Sep 17 00:00:00 2001
From: Brad Miller <bradm@twitter.com>
Date: Fri, 28 Jul 2023 10:05:13 -0700
Subject: [PATCH 2/2] merge conflict resolutions

---
 sourcecode/scoring/enums.py       |  5 ---
 sourcecode/scoring/run_scoring.py | 63 -------------------------------
 2 files changed, 68 deletions(-)

diff --git a/sourcecode/scoring/enums.py b/sourcecode/scoring/enums.py
index 623dcf0b..2e92df1f 100644
--- a/sourcecode/scoring/enums.py
+++ b/sourcecode/scoring/enums.py
@@ -6,15 +6,10 @@ class Scorers(Enum):
   """Exhaustive list of all scorers to simplify setting enabled/disabled scorers."""
 
   MFCoreScorer = auto()
-<<<<<<< HEAD
   MFExpansionScorer = auto()
   # Note that the MFGroupScorer value controls whether *all* group scorers are instantiated,
   # not just a single MFGroupScorer instance.
   MFGroupScorer = auto()
-=======
-  MFCoverageScorer = auto()
-  MFExpansionScorer = auto()
->>>>>>> main
 
 
 def scorers_from_csv(csv: str) -> Set[Scorers]:
diff --git a/sourcecode/scoring/run_scoring.py b/sourcecode/scoring/run_scoring.py
index e92e9b72..07c258a0 100644
--- a/sourcecode/scoring/run_scoring.py
+++ b/sourcecode/scoring/run_scoring.py
@@ -10,20 +10,12 @@
 from itertools import chain
 import multiprocessing
 import time
-<<<<<<< HEAD
 from typing import Dict, List, Optional, Set, Tuple
-=======
-from typing import List, Optional, Set, Tuple
->>>>>>> main
 
 from . import constants as c, contributor_state, note_ratings, note_status_history, scoring_rules
 from .enums import Scorers
 from .mf_base_scorer import MFBaseScorer
 from .mf_core_scorer import MFCoreScorer
-<<<<<<< HEAD
-=======
-from .mf_coverage_scorer import MFCoverageScorer, MFDummyCoverageScorer
->>>>>>> main
 from .mf_expansion_scorer import MFExpansionScorer
 from .mf_group_scorer import coalesce_group_models, groupScorerCount, MFGroupScorer
 from .scorer import Scorer
@@ -72,10 +64,6 @@ def _check_flips(
   unlockedPopNsh = unlockedPopNsh.merge(
     scoredNotes[[c.noteIdKey, resultCol]], on=c.noteIdKey, how="inner"
   )
-<<<<<<< HEAD
-
-=======
->>>>>>> main
   statusFlips = len(
     unlockedPopNsh.loc[
       (unlockedPopNsh[c.currentLabelKey] != unlockedPopNsh[resultCol])
@@ -89,10 +77,6 @@ def _check_flips(
   prevCrh = max(
     1, len(unlockedPopNsh.loc[unlockedPopNsh[c.currentLabelKey] == c.currentlyRatedHelpful])
   )
-<<<<<<< HEAD
-=======
-
->>>>>>> main
   print(f"Percentage of previous CRH flipping status: {(statusFlips / prevCrh)}")
 
   return (statusFlips / prevCrh) > pctFlips
@@ -100,11 +84,7 @@ def _check_flips(
 
 def _get_scorers(
   seed: Optional[int], pseudoraters: Optional[bool], enabledScorers: Optional[Set[Scorers]]
-<<<<<<< HEAD
 ) -> Dict[Scorers, List[Scorer]]:
-=======
-) -> List[Scorer]:
->>>>>>> main
   """Instantiate all Scorer objects which should be used for note ranking.
 
   Args:
@@ -115,7 +95,6 @@ def _get_scorers(
   Returns:
     Dict[Scorers, List[Scorer]] containing instantiated Scorer objects for note ranking.
   """
-<<<<<<< HEAD
   scorers: Dict[Scorers, List[Scorer]] = dict()
 
   if enabledScorers is None or Scorers.MFCoreScorer in enabledScorers:
@@ -128,18 +107,6 @@ def _get_scorers(
     scorers[Scorers.MFGroupScorer] = [
       MFGroupScorer(groupNumber=i) for i in range(1, groupScorerCount + 1)
     ]
-=======
-  scorers: List[Scorer] = []
-
-  if enabledScorers is None or Scorers.MFCoreScorer in enabledScorers:
-    scorers.append(MFCoreScorer(seed, pseudoraters))
-  if enabledScorers is None or Scorers.MFExpansionScorer in enabledScorers:
-    scorers.append(MFExpansionScorer(seed))
-  if enabledScorers is not None and Scorers.MFCoverageScorer in enabledScorers:
-    scorers.append(MFCoverageScorer(seed))
-  else:
-    scorers.append(MFDummyCoverageScorer())
->>>>>>> main
 
   return scorers
 
@@ -201,15 +168,9 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen
   result = None
   flips = False
 
-<<<<<<< HEAD
   # TODO: encapsulate this at a lower level so we're not accessing private state and can deal
   # with the case where there are no ratings for a scorer to run on after filtering.
   if isinstance(scorer, (MFBaseScorer, MFCoreScorer, MFExpansionScorer)):
-=======
-  if isinstance(
-    scorer, (MFBaseScorer, MFCoreScorer, MFCoverageScorer, MFExpansionScorer)
-  ) and not isinstance(scorer, MFDummyCoverageScorer):
->>>>>>> main
     while (
       (len(scorer._mfRanker.train_errors) == 0)
       or (scorer._mfRanker.train_errors[-1] > c.maxTrainError)
@@ -217,11 +178,8 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen
     ):
       runCounter += 1
       result = ModelResult(*scorer.score(ratings, noteStatusHistory, userEnrollment))
-<<<<<<< HEAD
       if runCounter > c.maxReruns:
         break
-=======
->>>>>>> main
       # check for notes createdat > lock time, not more than x% have changed status from nsh
       if isinstance(scorer, MFExpansionScorer):
         flips = _check_flips(
@@ -232,7 +190,6 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen
           c.expansionFlipPct,
           c.expansionRatingStatusKey,
         )
-<<<<<<< HEAD
       elif isinstance(scorer, MFCoreScorer):
         flips = _check_flips(
           result.scoredNotes,
@@ -245,8 +202,6 @@ def _run_scorer_parallelizable(scorer, ratings, noteStatusHistory, userEnrollmen
       if flips:
         if scorer._seed is not None:
           scorer._seed = scorer._seed + 1
-=======
->>>>>>> main
   else:
     result = ModelResult(*scorer.score(ratings, noteStatusHistory, userEnrollment))
     runCounter += 1
@@ -330,10 +285,7 @@ def _run_scorers(
 
 
 def meta_score(
-<<<<<<< HEAD
   scorers: Dict[Scorers, List[Scorer]],
-=======
->>>>>>> main
   scoredNotes: pd.DataFrame,
   auxiliaryNoteInfo: pd.DataFrame,
   lockedStatus: pd.DataFrame,
@@ -380,7 +332,6 @@ def meta_score(
         RuleID.CORE_MODEL, {RuleID.META_INITIAL_NMR}, c.coreRatingStatusKey
       )
     )
-<<<<<<< HEAD
   if enabledScorers is None or Scorers.MFGroupScorer in enabledScorers:
     # TODO: modify this code to work when MFExpansionScorer is disabled by the system test
     assert len(scorers[Scorers.MFCoreScorer]) == 1
@@ -401,14 +352,6 @@ def meta_score(
           expansionCrhThreshold,
         )
       )
-=======
-  if enabledScorers is not None and Scorers.MFCoverageScorer in enabledScorers:
-    rules.append(
-      scoring_rules.ApplyAdditiveModelResult(
-        RuleID.COVERAGE_MODEL, {RuleID.META_INITIAL_NMR}, c.coverageRatingStatusKey, 0.38
-      )
-    )
->>>>>>> main
   rules.extend(
     [
       scoring_rules.ScoringDriftGuard(
@@ -697,10 +640,7 @@ def run_scoring(
 
   # Assign final status to notes based on individual model scores and note attributes.
   scoredNotesCols, auxiliaryNoteInfoCols = meta_score(
-<<<<<<< HEAD
     scorers,
-=======
->>>>>>> main
     scoredNotes,
     auxiliaryNoteInfo,
     noteStatusHistory[[c.noteIdKey, c.lockedStatusKey]],
@@ -731,10 +671,7 @@ def run_scoring(
   ), "noteStatusHistory should contain all notes after preprocessing"
 
   # Skip validation and selection out output columns if the set of scorers is overridden.
-<<<<<<< HEAD
   scoredNotes = _add_deprecated_columns(scoredNotes)
-=======
->>>>>>> main
   if strictColumns:
     scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo = _validate(
       scoredNotes, helpfulnessScores, newNoteStatusHistory, auxiliaryNoteInfo