diff --git a/sourcecode/scoring/constants.py b/sourcecode/scoring/constants.py
index 331d81bf..c2fce580 100644
--- a/sourcecode/scoring/constants.py
+++ b/sourcecode/scoring/constants.py
@@ -60,6 +60,7 @@
 # Timestamps
 deletedNoteTombstonesLaunchTime = 1652918400000  # May 19, 2022 UTC
 notMisleadingUILaunchTime = 1664755200000  # October 3, 2022 UTC
+lastRatingTagsChangeTimeMillis = 1639699200000  # 2021/12/15 UTC
 publicTSVTimeDelay = 172800000  # 48 hours
 
 # Explanation Tags
@@ -138,6 +139,12 @@ def rater_factor_key(i):
 groupNoteInterceptMinKey = "groupNoteInterceptMin"
 groupRaterInterceptKey = "groupRaterIntercept"
 groupRaterFactor1Key = "groupRaterFactor1"
+# Harassment/Abuse Tag
+harassmentNoteInterceptKey = "harassmentNoteIntercept"
+harassmentNoteFactor1Key = "harassmentNoteFactor1"
+harassmentRaterInterceptKey = "harassmentRaterIntercept"
+harassmentRaterFactor1Key = "harassmentRaterFactor1"
+
 
 # Ids and Indexes
 noteIdKey = "noteId"
@@ -162,6 +169,8 @@ def rater_factor_key(i):
 raterAgreeRatioKey = "raterAgreeRatio"
 ratingAgreesWithNoteStatusKey = "ratingAgreesWithNoteStatus"
 aboveHelpfulnessThresholdKey = "aboveHelpfulnessThreshold"
+totalHelpfulHarassmentRatingsPenaltyKey = "totalHelpfulHarassmentPenalty"
+raterAgreeRatioWithHarassmentAbusePenaltyKey = "raterAgreeRatioKeyWithHarassmentAbusePenalty"
 
 # Note Status Labels
 currentlyRatedHelpful = "CURRENTLY_RATED_HELPFUL"
diff --git a/sourcecode/scoring/helpfulness_scores.py b/sourcecode/scoring/helpfulness_scores.py
index ca60ebee..018be6a1 100644
--- a/sourcecode/scoring/helpfulness_scores.py
+++ b/sourcecode/scoring/helpfulness_scores.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from . import constants as c
 
 import numpy as np
@@ -70,6 +72,11 @@ def compute_general_helpfulness_scores(
   minMeanNoteScore: float,
   minCRHVsCRNHRatio: float,
   minRaterAgreeRatio: float,
+  ratings: Optional[pd.DataFrame] = None,
+  tagConsensusHarassmentAbuseNotes: Optional[pd.DataFrame] = None,
+  tagConsensusHarassmentHelpfulRatingPenalty=10,
+  multiplyPenaltyByHarassmentScore: bool = False,
+  minimumHarassmentScoreToPenalize: float = 2.5,
 ) -> pd.DataFrame:
   """Given notes scored by matrix factorization, compute helpfulness scores.
   Author helpfulness scores are based on the scores of the notes you wrote.
@@ -84,6 +91,7 @@ def compute_general_helpfulness_scores(
         comparing how often an author produces CRH / CRNH notes.  See author_helpfulness.
       minRaterAgreeRatio: minimum standard for how often a rater must predict the
         eventual outcome when rating before a note is assigned status.
+      ratings: all ratings (to check if tag-consensus harassment/abuse notes were rated helpful)
   Returns:
       helpfulness_scores pandas.DataFrame: 1 row per user, with helpfulness scores as columns.
   """
@@ -101,26 +109,67 @@ def compute_general_helpfulness_scores(
         c.crhCrnhRatioDifferenceKey,
         c.meanNoteScoreKey,
         c.raterAgreeRatioKey,
+        c.ratingAgreesWithNoteStatusKey,
+        c.ratingCountKey,
       ]
     ]
   )
 
-  helpfulnessScores[c.aboveHelpfulnessThresholdKey] = (
-    (
-      (helpfulnessScores[c.crhCrnhRatioDifferenceKey] >= minCRHVsCRNHRatio)
-      & (helpfulnessScores[c.meanNoteScoreKey] >= minMeanNoteScore)
+  if (ratings is None) or (tagConsensusHarassmentAbuseNotes is None):
+    helpfulnessScores[c.totalHelpfulHarassmentRatingsPenaltyKey] = 0
+  else:
+    filteredAbuseNotes = tagConsensusHarassmentAbuseNotes[
+      tagConsensusHarassmentAbuseNotes[c.harassmentNoteInterceptKey]
+      >= minimumHarassmentScoreToPenalize
+    ]
+    helpfulRatingsOnBadNotes = ratings[ratings[c.helpfulNumKey] == 1].merge(
+      filteredAbuseNotes, on=c.noteIdKey
     )
-    | (
-      pd.isna(helpfulnessScores[c.crhCrnhRatioDifferenceKey])
-      & pd.isna(helpfulnessScores[c.meanNoteScoreKey])
+
+    helpfulRatingsOnBadNotes[
+      c.totalHelpfulHarassmentRatingsPenaltyKey
+    ] = tagConsensusHarassmentHelpfulRatingPenalty
+    if multiplyPenaltyByHarassmentScore:
+      helpfulRatingsOnBadNotes[c.totalHelpfulHarassmentRatingsPenaltyKey] *= (
+        helpfulRatingsOnBadNotes[c.harassmentNoteInterceptKey] / minimumHarassmentScoreToPenalize
+      )
+
+    helpfulRatingsOnBadNotesCount = (
+      helpfulRatingsOnBadNotes.groupby(c.raterParticipantIdKey)
+      .sum()[[c.totalHelpfulHarassmentRatingsPenaltyKey]]
+      .reset_index()
     )
-    | (
-      pd.isna(helpfulnessScores[c.crhCrnhRatioDifferenceKey])
-      & helpfulnessScores[c.meanNoteScoreKey]
-      >= minMeanNoteScore
+    helpfulnessScores = helpfulnessScores.merge(
+      helpfulRatingsOnBadNotesCount, on=c.raterParticipantIdKey, how="left"
     )
-  ) & (helpfulnessScores[c.raterAgreeRatioKey] >= minRaterAgreeRatio)
+    helpfulnessScores[c.totalHelpfulHarassmentRatingsPenaltyKey].fillna(0, inplace=True)
+
+  helpfulnessScores[c.raterAgreeRatioWithHarassmentAbusePenaltyKey] = (
+    helpfulnessScores[c.ratingAgreesWithNoteStatusKey]
+    - helpfulnessScores[c.totalHelpfulHarassmentRatingsPenaltyKey]
+  ) / helpfulnessScores[c.ratingCountKey]
+
+  helpfulnessScores[c.aboveHelpfulnessThresholdKey] = (
+    (
+      (
+        (helpfulnessScores[c.crhCrnhRatioDifferenceKey] >= minCRHVsCRNHRatio)
+        & (helpfulnessScores[c.meanNoteScoreKey] >= minMeanNoteScore)
+      )
+      | (
+        pd.isna(helpfulnessScores[c.crhCrnhRatioDifferenceKey])
+        & pd.isna(helpfulnessScores[c.meanNoteScoreKey])
+      )
+      | (
+        pd.isna(helpfulnessScores[c.crhCrnhRatioDifferenceKey])
+        & helpfulnessScores[c.meanNoteScoreKey]
+        >= minMeanNoteScore
+      )
+    )
+    & (helpfulnessScores[c.raterAgreeRatioKey] >= minRaterAgreeRatio)
+    & (helpfulnessScores[c.raterAgreeRatioWithHarassmentAbusePenaltyKey] >= minRaterAgreeRatio)
+  )
 
+  helpfulnessScores.drop(columns=[c.ratingCountKey, c.ratingAgreesWithNoteStatusKey], inplace=True)
   return helpfulnessScores
 
 
diff --git a/sourcecode/scoring/matrix_factorization/matrix_factorization.py b/sourcecode/scoring/matrix_factorization/matrix_factorization.py
index 4c296530..129a9121 100644
--- a/sourcecode/scoring/matrix_factorization/matrix_factorization.py
+++ b/sourcecode/scoring/matrix_factorization/matrix_factorization.py
@@ -30,6 +30,8 @@ def __init__(
     model: Optional[BiasedMatrixFactorization] = None,
     featureCols: List[str] = [c.noteIdKey, c.raterParticipantIdKey],
     labelCol: str = c.helpfulNumKey,
+    useSigmoidCrossEntropy=False,
+    posWeight=None,
   ) -> None:
     """Configure matrix factorization note ranking."""
     self._l2_lambda = l2_lambda
@@ -43,8 +45,25 @@ def __init__(
     self._flipFactorsForIdentification = flipFactorsForIdentification
     self._featureCols = featureCols
     self._labelCol = labelCol
+    self._useSigmoidCrossEntropy = useSigmoidCrossEntropy
+    self._posWeight = posWeight
+
+    if self._useSigmoidCrossEntropy:
+      if self._posWeight:
+        if logging:
+          print(f"Using pos weight: {self._posWeight} with BCEWithLogitsLoss")
+        self.criterion = torch.nn.BCEWithLogitsLoss(
+          pos_weight=torch.Tensor(np.array(self._posWeight))
+        )
+      else:
+        if logging:
+          print("Using BCEWithLogitsLoss")
+        self.criterion = torch.nn.BCEWithLogitsLoss()
+    else:
+      if self._posWeight:
+        raise ValueError("posWeight is not supported for MSELoss")
+      self.criterion = torch.nn.MSELoss()
 
-    self.criterion = torch.nn.MSELoss()
     self.train_errors: List[float] = []
     self.test_errors: List[float] = []
     self.mf_model = model
@@ -340,7 +359,6 @@ def _fit_model(
     while (abs(loss.item() - prev_loss) > self._convergence) and (
       not (epoch > 100 and loss.item() > prev_loss)
     ):
-
       prev_loss = loss.item()
 
       # Backpropagate
diff --git a/sourcecode/scoring/mf_base_scorer.py b/sourcecode/scoring/mf_base_scorer.py
index a7bc988d..e0a0ff91 100644
--- a/sourcecode/scoring/mf_base_scorer.py
+++ b/sourcecode/scoring/mf_base_scorer.py
@@ -1,6 +1,6 @@
 from typing import List, Optional, Tuple
 
-from . import constants as c, helpfulness_scores, note_ratings, process_data
+from . import constants as c, helpfulness_scores, note_ratings, process_data, tag_consensus
 from .matrix_factorization.matrix_factorization import MatrixFactorization
 from .matrix_factorization.pseudo_raters import PseudoRatersRunner
 from .scorer import Scorer
@@ -36,6 +36,7 @@ def __init__(
     crhSuperThreshold: float = 0.5,
     inertiaDelta: float = 0.01,
     useStableInitialization: bool = True,
+    saveIntermediateState: bool = False,
   ):
     """Configure MatrixFactorizationScorer object.
 
@@ -88,6 +89,7 @@ def __init__(
     self._crhSuperThreshold = crhSuperThreshold
     self._inertiaDelta = inertiaDelta
     self._modelingGroupToInitializeForStability = 13 if useStableInitialization else None
+    self._saveIntermediateState = saveIntermediateState
     self._mfRanker = MatrixFactorization()
 
   def get_crh_threshold(self) -> float:
@@ -121,7 +123,10 @@ def get_helpfulness_scores_cols(self) -> List[str]:
 
   def get_auxiliary_note_info_cols(self) -> List[str]:
     """Returns a list of columns which should be present in the auxiliaryNoteInfo output."""
-    return [c.noteIdKey, c.ratingWeightKey,] + (
+    return [
+      c.noteIdKey,
+      c.ratingWeightKey,
+    ] + (
       c.notHelpfulTagsAdjustedColumns
       + c.notHelpfulTagsAdjustedRatioColumns
       + c.incorrectFilterColumns
@@ -281,11 +286,17 @@ def _score_notes_and_users(
     # Removes ratings where either (1) the note did not receive enough ratings, or
     # (2) the rater did not rate enough notes.
     ratingsForTraining = self._prepare_data_for_scoring(ratings)
+    if self._saveIntermediateState:
+      self.ratingsForTraining = ratingsForTraining
 
     # TODO: Save parameters from this first run in note_model_output next time we add extra fields to model output TSV.
     noteParamsUnfiltered, raterParamsUnfiltered, globalBias = self._run_stable_matrix_factorization(
       ratingsForTraining, userEnrollmentRaw
     )
+    if self._saveIntermediateState:
+      self.noteParamsUnfiltered = noteParamsUnfiltered
+      self.raterParamsUnfiltered = raterParamsUnfiltered
+      self.globalBias = globalBias
 
     # Get a dataframe of scored notes based on the algorithm results above
     scoredNotes = note_ratings.compute_scored_notes(
@@ -303,6 +314,8 @@ def _score_notes_and_users(
       crhSuperThreshold=self._crhSuperThreshold,
       inertiaDelta=self._inertiaDelta,
     )
+    if self._saveIntermediateState:
+      self.firstRoundScoredNotes = scoredNotes
 
     # Determine "valid" ratings
     validRatings = note_ratings.get_valid_ratings(
@@ -317,9 +330,48 @@ def _score_notes_and_users(
         ]
       ],
     )
+    if self._saveIntermediateState:
+      self.validRatings = validRatings
 
     # Assigns contributor (author & rater) helpfulness bit based on (1) performance
     # authoring and reviewing previous and current notes.
+    helpfulnessScoresPreHarassmentFilter = helpfulness_scores.compute_general_helpfulness_scores(
+      scoredNotes[
+        [
+          c.noteAuthorParticipantIdKey,
+          c.currentlyRatedHelpfulBoolKey,
+          c.currentlyRatedNotHelpfulBoolKey,
+          c.internalNoteInterceptKey,
+        ]
+      ],
+      validRatings,
+      self._minMeanNoteScore,
+      self._minCRHVsCRNHRatio,
+      self._minRaterAgreeRatio,
+      ratingsForTraining,
+    )
+    if self._saveIntermediateState:
+      self.firstRoundHelpfulnessScores = helpfulnessScoresPreHarassmentFilter
+
+    # Filters ratings matrix to include only rows (ratings) where the rater was
+    # considered helpful.
+    ratingsHelpfulnessScoreFilteredPreHarassmentFilter = (
+      helpfulness_scores.filter_ratings_by_helpfulness_scores(
+        ratingsForTraining, helpfulnessScoresPreHarassmentFilter
+      )
+    )
+    if self._saveIntermediateState:
+      self.ratingsHelpfulnessScoreFilteredPreHarassmentFilter = (
+        ratingsHelpfulnessScoreFilteredPreHarassmentFilter
+      )
+
+    harassmentAbuseNoteParams, _, _ = tag_consensus.train_tag_model(
+      ratingsHelpfulnessScoreFilteredPreHarassmentFilter, c.notHelpfulSpamHarassmentOrAbuseTagKey
+    )
+
+    # Assigns contributor (author & rater) helpfulness bit based on (1) performance
+    # authoring and reviewing previous and current notes, and (2) including an extra
+    # penalty for rating a harassment/abuse note as helpful.
     helpfulnessScores = helpfulness_scores.compute_general_helpfulness_scores(
       scoredNotes[
         [
@@ -333,13 +385,19 @@ def _score_notes_and_users(
       self._minMeanNoteScore,
       self._minCRHVsCRNHRatio,
       self._minRaterAgreeRatio,
+      ratings=ratingsForTraining,
+      tagConsensusHarassmentAbuseNotes=harassmentAbuseNoteParams,
     )
+    if self._saveIntermediateState:
+      self.firstRoundHelpfulnessScores = helpfulnessScores
 
     # Filters ratings matrix to include only rows (ratings) where the rater was
     # considered helpful.
     ratingsHelpfulnessScoreFiltered = helpfulness_scores.filter_ratings_by_helpfulness_scores(
       ratingsForTraining, helpfulnessScores
     )
+    if self._saveIntermediateState:
+      self.ratingsHelpfulnessScoreFiltered = ratingsHelpfulnessScoreFiltered
 
     # Re-runs matrix factorization using only ratings given by helpful raters.
     noteParams, raterParams, globalBias = self._mfRanker.run_mf(
@@ -357,6 +415,10 @@ def _score_notes_and_users(
     else:
       for col in c.noteParameterUncertaintyTSVColumns:
         noteParams[col] = np.nan
+    if self._saveIntermediateState:
+      self.noteParams = noteParams
+      self.raterParams = raterParams
+      self.globalBias = globalBias
 
     # Assigns updated CRH / CRNH bits to notes based on volume of prior ratings
     # and ML output.
@@ -376,6 +438,7 @@ def _score_notes_and_users(
       inertiaDelta=self._inertiaDelta,
       finalRound=True,
     )
+
     # Takes raterParams from most recent MF run, but use the pre-computed
     # helpfulness scores.
     helpfulnessScores = raterParams.merge(
@@ -392,4 +455,8 @@ def _score_notes_and_users(
       how="outer",
     )
 
+    if self._saveIntermediateState:
+      self.scoredNotes = scoredNotes
+      self.helpfulnessScores = helpfulnessScores
+
     return scoredNotes, helpfulnessScores
diff --git a/sourcecode/scoring/mf_core_scorer.py b/sourcecode/scoring/mf_core_scorer.py
index ba391169..e285223a 100644
--- a/sourcecode/scoring/mf_core_scorer.py
+++ b/sourcecode/scoring/mf_core_scorer.py
@@ -19,6 +19,7 @@ def __init__(
     pseudoraters: Optional[bool] = False,
     core_threshold: float = 0.5,
     useStableInitialization: bool = True,
+    saveIntermediateState: bool = False,
   ) -> None:
     """Configure MFCoreScorer object.
 
@@ -28,7 +29,12 @@ def __init__(
       core_threshold: float specifying the fraction of reviews which must be from CORE users
         for a note to be in scope for the CORE model.
     """
-    super().__init__(seed, pseudoraters, useStableInitialization=useStableInitialization)
+    super().__init__(
+      seed,
+      pseudoraters,
+      useStableInitialization=useStableInitialization,
+      saveIntermediateState=saveIntermediateState,
+    )
     self._core_threshold = core_threshold
 
   def _get_note_col_mapping(self) -> Dict[str, str]:
diff --git a/sourcecode/scoring/mf_expansion_scorer.py b/sourcecode/scoring/mf_expansion_scorer.py
index 27391dd4..276dd7dc 100644
--- a/sourcecode/scoring/mf_expansion_scorer.py
+++ b/sourcecode/scoring/mf_expansion_scorer.py
@@ -5,13 +5,23 @@
 
 
 class MFExpansionScorer(MFBaseScorer):
-  def __init__(self, seed: Optional[int] = None, useStableInitialization: bool = True) -> None:
+  def __init__(
+    self,
+    seed: Optional[int] = None,
+    useStableInitialization: bool = True,
+    saveIntermediateState: bool = False,
+  ) -> None:
     """Configure MFExpansionScorer object.
 
     Args:
       seed: if not None, seed value to ensure deterministic execution
     """
-    super().__init__(seed, pseudoraters=False, useStableInitialization=useStableInitialization)
+    super().__init__(
+      seed,
+      pseudoraters=False,
+      useStableInitialization=useStableInitialization,
+      saveIntermediateState=saveIntermediateState,
+    )
 
   def _get_note_col_mapping(self) -> Dict[str, str]:
     """Returns a dict mapping default note column names to custom names for a specific model."""
diff --git a/sourcecode/scoring/mf_group_scorer.py b/sourcecode/scoring/mf_group_scorer.py
index aa9d12cb..db529819 100644
--- a/sourcecode/scoring/mf_group_scorer.py
+++ b/sourcecode/scoring/mf_group_scorer.py
@@ -39,6 +39,7 @@ def _coalesce_columns(df: pd.DataFrame, columnPrefix: str) -> pd.DataFrame:
   # Validate that at most one column is set, and store which rows have a column set
   rowResults = np.invert(df[columns].isna()).sum(axis=1)
   assert all(rowResults <= 1), "each row should only be in one modeling group"
+
   # Coalesce results
   def _get_value(row):
     idx = row.first_valid_index()
@@ -91,6 +92,7 @@ def __init__(
     seed: Optional[int] = None,
     pseudoraters: Optional[bool] = False,
     groupThreshold: float = 0.8,
+    saveIntermediateState: bool = False,
   ) -> None:
     """Configure MFGroupScorer object.
 
@@ -108,7 +110,9 @@ def __init__(
       groupThreshold: float indicating what fraction of ratings must be from within a group
         for the model to be active
     """
-    super().__init__(seed, pseudoraters, useStableInitialization=False)
+    super().__init__(
+      seed, pseudoraters, useStableInitialization=False, saveIntermediateState=saveIntermediateState
+    )
     assert groupNumber > 0, "groupNumber must be positive.  0 is reserved for unassigned."
     assert groupNumber <= groupScorerCount, "groupNumber exceeds maximum expected groups."
     self._groupNumber = groupNumber
diff --git a/sourcecode/scoring/tag_consensus.py b/sourcecode/scoring/tag_consensus.py
new file mode 100644
index 00000000..d6acbf3f
--- /dev/null
+++ b/sourcecode/scoring/tag_consensus.py
@@ -0,0 +1,90 @@
+from . import constants as c, process_data
+from .matrix_factorization.matrix_factorization import MatrixFactorization
+
+import pandas as pd
+
+
+def train_tag_model(
+  ratings: pd.DataFrame,
+  tag: str = c.notHelpfulSpamHarassmentOrAbuseTagKey,
+  useSigmoidCrossEntropy: bool = True,
+):
+  print(f"-------------------Training for tag {tag}-------------------")
+  ratingDataForTag, labelColName = prepare_tag_data(ratings, tag)
+  if ratingDataForTag is None or len(ratingDataForTag) == 0:
+    print(f"No valid data for {tag}, returning None and aborting {tag} model training.")
+    return None, None, None
+
+  posRate = ratingDataForTag[labelColName].sum() / len(ratingDataForTag)
+  print(f"{tag} Positive Rate: {posRate}")
+  if pd.isna(posRate) or posRate == 0 or posRate == 1:
+    print(
+      f"{tag} tag positive rate is {posRate}: returning None and aborting {tag} model training."
+    )
+    return None, None, None
+
+  if useSigmoidCrossEntropy:
+    posWeight = (1 - posRate) / posRate
+  else:
+    posWeight = None
+
+  # Train
+  mf = MatrixFactorization(
+    labelCol=labelColName,
+    useSigmoidCrossEntropy=useSigmoidCrossEntropy,
+    posWeight=posWeight,
+  )
+  noteParams, raterParams, globalBias = mf.run_mf(ratingDataForTag)
+  noteParams.columns = [col.replace("internal", "harassment") for col in noteParams.columns]
+  raterParams.columns = [col.replace("internal", "harassment") for col in raterParams.columns]
+  return noteParams, raterParams, globalBias
+
+
+def prepare_tag_data(
+  allRatings: pd.DataFrame,
+  tagName: str = c.notHelpfulIncorrectTagKey,
+  minNumRatingsPerRater: int = 10,
+  minNumRatersPerNote: int = 5,
+):
+  ratings = allRatings.loc[
+    allRatings[c.createdAtMillisKey] >= c.lastRatingTagsChangeTimeMillis
+  ].copy()
+  if len(ratings) == 0:
+    return None, None
+
+  labelColName = tagName + "Label"
+  ratings.loc[:, labelColName] = None
+
+  if tagName.startswith("helpful"):
+    oppositeValenceHelpfulness = c.notHelpfulValueTsv
+    sameValenceOtherTag = c.helpfulOtherTagKey
+  elif tagName.startswith("notHelpful"):
+    oppositeValenceHelpfulness = c.helpfulValueTsv
+    sameValenceOtherTag = c.notHelpfulOtherTagKey
+  else:
+    raise Exception("Tag unsupported.")
+
+  # Negatives: opposite helpful rating, or same-valence rating with other tag (and no target tag)
+  ratings.loc[ratings[c.helpfulnessLevelKey] == oppositeValenceHelpfulness, labelColName] = 0
+  # Treat a same-valence rating as negative if the tag used was other
+  #  (other is the only tag uncorrelated enough with other tags))
+  ## Will be set to True later if the tag itself was also true
+  ratings.loc[ratings[sameValenceOtherTag] == 1, labelColName] = 0
+
+  # Positives
+  ratings.loc[ratings[tagName] == 1, labelColName] = 1
+
+  print("Pre-filtering tag label breakdown", ratings.groupby(labelColName).size())
+  print("Number of rows with no tag label", ratings[labelColName].isnull().sum())
+
+  # Currently leave in raters who only made one type of rating, but can throw them out in the future.
+  ratings = process_data.filter_ratings(
+    ratings[ratings[labelColName].notnull()], minNumRatingsPerRater, minNumRatersPerNote
+  )
+
+  print("Post-filtering tag label breakdown", ratings.groupby(labelColName).size())
+  print("Number of rows with no tag label", ratings[labelColName].isnull().sum())
+
+  ratings[labelColName] = ratings[labelColName].astype(int)
+
+  return ratings, labelColName