twitter · jbaxter · Sep 17, 2024 · Sep 17, 2024
diff --git a/sourcecode/scoring/matrix_factorization/matrix_factorization.py b/sourcecode/scoring/matrix_factorization/matrix_factorization.py
@@ -30,7 +30,6 @@ def __init__(
     numFactors=1,
     useGlobalIntercept=True,
     log=True,
-    flipFactorsForIdentification=True,
     model: Optional[BiasedMatrixFactorization] = None,
     featureCols: List[str] = [c.noteIdKey, c.raterParticipantIdKey],
     labelCol: str = c.helpfulNumKey,
@@ -51,7 +50,6 @@ def __init__(
     self._numFactors = numFactors
     self._useGlobalIntercept = useGlobalIntercept
     self._log = log
-    self._flipFactorsForIdentification = flipFactorsForIdentification
     self._featureCols = featureCols
     self._labelCol = labelCol
     self._useSigmoidCrossEntropy = useSigmoidCrossEntropy
@@ -103,7 +101,6 @@ def get_new_mf_with_same_args(self):
       numFactors=self._numFactors,
       useGlobalIntercept=self._useGlobalIntercept,
       log=self._log,
-      flipFactorsForIdentification=self._flipFactorsForIdentification,
       model=None,
       featureCols=self._featureCols,
       labelCol=self._labelCol,
@@ -225,7 +222,9 @@ def _initialize_parameters(
         torch.ones(1, 1, dtype=torch.float32) * globalInterceptInit
       )
 
-  def _get_parameters_from_trained_model(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
+  def _get_parameters_from_trained_model(
+    self, flipFactorsForIdentification: bool = True
+  ) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """
     Returns:
         Tuple[pd.DataFrame, pd.DataFrame]: noteIdMap, raterIdMap
@@ -247,7 +246,7 @@ def _get_parameters_from_trained_model(self) -> Tuple[pd.DataFrame, pd.DataFrame
         :, i
       ]
 
-    if self._flipFactorsForIdentification:
+    if flipFactorsForIdentification:
       noteParams, raterParams = self._flip_factors_for_identification(noteParams, raterParams)
 
     return noteParams, raterParams
@@ -497,10 +496,12 @@ def run_mf(
     globalInterceptInit: Optional[float] = None,
     specificNoteId: Optional[int] = None,
     validatePercent: Optional[float] = None,
+    freezeNoteParameters: bool = False,
     freezeRaterParameters: bool = False,
     freezeGlobalParameters: bool = False,
     ratingPerNoteLossRatio: Optional[float] = None,
     ratingPerUserLossRatio: Optional[float] = None,
+    flipFactorsForIdentification: bool = True,
   ):
     """Train matrix factorization model.
 
@@ -546,13 +547,15 @@ def run_mf(
       self.mf_model._freeze_parameters(set({"user"}))
     if freezeGlobalParameters:
       self.mf_model._freeze_parameters(set({"global"}))
+    if freezeNoteParameters:
+      self.mf_model._freeze_parameters(set({"note"}))
     if specificNoteId is not None:
       self.mf_model.freeze_rater_and_global_parameters()
     self.prepare_features_and_labels(specificNoteId)
 
     train_loss, loss, validate_loss = self._fit_model(validatePercent)
     if self._normalizedLossHyperparameters is not None:
-      _, raterParams = self._get_parameters_from_trained_model()
+      _, raterParams = self._get_parameters_from_trained_model(flipFactorsForIdentification)
       assert self.modelData is not None
       self._lossModule = NormalizedLoss(
         self.criterion,
@@ -575,7 +578,9 @@ def run_mf(
       if self._log:
         logger.info(f"Global Intercept: {globalIntercept}")
 
-    fitNoteParams, fitRaterParams = self._get_parameters_from_trained_model()
+    fitNoteParams, fitRaterParams = self._get_parameters_from_trained_model(
+      flipFactorsForIdentification
+    )
 
     fitRaterParams.drop(Constants.raterIndexKey, axis=1, inplace=True)
     if validatePercent is None:

diff --git a/sourcecode/scoring/matrix_factorization/pseudo_raters.py b/sourcecode/scoring/matrix_factorization/pseudo_raters.py
@@ -87,7 +87,9 @@ def _check_note_parameters_same(self, newMatrixFactorization: MatrixFactorizatio
     (
       noteParamsFromNewModel,
       raterParamsFromNewModel,
-    ) = newMatrixFactorization._get_parameters_from_trained_model()
+    ) = newMatrixFactorization._get_parameters_from_trained_model(
+      flipFactorsForIdentification=False
+    )
     assert (noteParamsFromNewModel == self.noteParams).all().all()
 
   def _make_extreme_raters(self, raterParams: pd.DataFrame, raterIdMap: pd.DataFrame):
@@ -206,7 +208,9 @@ def _fit_all_notes_with_raters_constant(self, ratingFeaturesAndLabelsWithExtreme
     if self._checkParamsSame:
       self._check_rater_parameters_same(newExtremeMF)
 
-    fitNoteParams, fitRaterParams = newExtremeMF._get_parameters_from_trained_model()
+    fitNoteParams, fitRaterParams = newExtremeMF._get_parameters_from_trained_model(
+      flipFactorsForIdentification=False
+    )
     return fitNoteParams
 
   def _create_extreme_ratings(self):

diff --git a/sourcecode/scoring/mf_base_scorer.py b/sourcecode/scoring/mf_base_scorer.py
@@ -907,6 +907,7 @@ def _score_notes_and_users(
     prescoringNoteModelOutput: pd.DataFrame,
     prescoringRaterModelOutput: pd.DataFrame,
     prescoringMetaScorerOutput: c.PrescoringMetaScorerOutput,
+    flipFactorsForIdentification: bool = False,
   ) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """Run the "final" matrix factorization scoring algorithm.
     Accepts prescoring's output as its input, as well as the new ratings and note status history.
@@ -983,6 +984,7 @@ def _score_notes_and_users(
         freezeGlobalParameters=True,
         ratingPerNoteLossRatio=prescoringMetaScorerOutput.finalRoundNumRatings
         / prescoringMetaScorerOutput.finalRoundNumNotes,
+        flipFactorsForIdentification=flipFactorsForIdentification,
       )
 
     if self._saveIntermediateState:
@@ -1139,6 +1141,7 @@ def score_final(self, scoringArgs: c.FinalScoringArgs) -> c.ModelResult:
       prescoringNoteModelOutput=prescoringNoteModelOutput,
       prescoringRaterModelOutput=prescoringRaterModelOutput,
       prescoringMetaScorerOutput=prescoringMetaScorerOutput,
+      flipFactorsForIdentification=False,
     )
 
     with self.time_block("Postprocess output"):

diff --git a/sourcecode/scoring/note_status_history.py b/sourcecode/scoring/note_status_history.py
@@ -157,6 +157,10 @@ def _update_single_note_status_history(mergedNote, currentTimeMillis, newScoredN
     mergedNote[c.lockedStatusKey] = mergedNote[c.finalRatingStatusKey]
     mergedNote[c.timestampMillisOfStatusLockKey] = currentTimeMillis
 
+  # Clear timestampMillisOfNmrDueToMinStableCrhTimeKey if the note is locked.
+  if pd.notna(mergedNote[c.lockedStatusKey]):
+    mergedNote[c.timestampMillisOfNmrDueToMinStableCrhTimeKey] = -1
+
   if pd.isna(mergedNote[c.createdAtMillisKey + newScoredNotesSuffix]):
     # note used to be scored but isn't now; just retain old info
     return mergedNote

diff --git a/sourcecode/scoring/pandas_utils.py b/sourcecode/scoring/pandas_utils.py
@@ -56,10 +56,11 @@ def get_df_info(
     .reset_index(drop=False)
     .rename(columns={"index": "column", 0: "RAM"})
   )
+  ramBytes = stats["RAM"].sum()
   if name is not None:
-    lines = [f"""{name} total RAM: {stats["RAM"].sum()}"""]
+    lines = [f"""{name} total RAM: {ramBytes} bytes ({ramBytes * 1e-9:.3f} GB)"""]
   else:
-    lines = [f"""total RAM: {stats["RAM"].sum()}"""]
+    lines = [f"""total RAM: {ramBytes} bytes ({ramBytes * 1e-9:.3f} GB)"""]
   lines.extend(str(stats).split("\n"))
   if counter:
     for col, dtype in zip(stats["column"], stats["dtype"]):