Merge pull request #257 from twitter/jbaxter/2024_08_21

New NSH columns and cleanup
twitter · Aug 21, 2024 · 2d6d6a5 · 2d6d6a5
2 parents 956d8bd + 167be83
commit 2d6d6a5
Show file tree

Hide file tree

Showing 22 changed files with 421 additions and 303 deletions.
diff --git a/sourcecode/scoring/constants.py b/sourcecode/scoring/constants.py
@@ -1,6 +1,7 @@
 from contextlib import contextmanager
 from dataclasses import dataclass
 from enum import Enum
+import logging
 import os
 import time
 from typing import Dict, Optional, Set
@@ -9,6 +10,10 @@
 import pandas as pd
 
 
+logger = logging.getLogger("birdwatch.constants")
+logger.setLevel(logging.INFO)
+
+
 # Default number of threads to use in torch if os.cpu_count() is unavailable
 # and no value is specified.
 defaultNumThreads = os.cpu_count() or 8
@@ -461,6 +466,8 @@ def rater_factor_key(i):
 updatedTimestampMillisOfNmrDueToMinStableCrhTimeKey = (
   "updatedTimestampMillisOfNmrDueToMinStableCrhTime"
 )
+timestampMinuteOfFinalScoringOutput = "timestampMinuteOfFinalScoringOutput"
+timestampMillisOfFirstNmrDueToMinStableCrhTimeKey = "timestampMillisOfFirstNmrDueToMinStableCrhTime"
 
 noteStatusHistoryTSVColumnsAndTypes = [
   (noteIdKey, np.int64),
@@ -484,6 +491,8 @@ def rater_factor_key(i):
   (timestampMillisOfNmrDueToMinStableCrhTimeKey, np.double),  # double because nullable.
   (currentMultiGroupStatusKey, "category"),
   (currentModelingMultiGroupKey, np.double),  # TODO: int
+  (timestampMinuteOfFinalScoringOutput, np.double),  # double because nullable.
+  (timestampMillisOfFirstNmrDueToMinStableCrhTimeKey, np.double),  # double because nullable.
 ]
 noteStatusHistoryTSVColumns = [col for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]
 noteStatusHistoryTSVTypes = [dtype for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]
@@ -818,8 +827,6 @@ def rater_factor_key(i):
 inputPathsTSVColumns = [col for (col, _) in inputPathsTSVColumnsAndTypes]
 inputPathsTSVTypeMapping = {col: dtype for (col, dtype) in inputPathsTSVColumnsAndTypes}
 
-timestampMinuteOfFinalScoringOutput = "timestampMinuteOfFinalScoringOutput"
-
 
 @contextmanager
 def time_block(label):
@@ -828,7 +835,7 @@ def time_block(label):
     yield
   finally:
     end = time.time()
-    print(f"{label} elapsed time: {end - start:.2f} secs ({((end - start) / 60.0):.2f} mins)")
+    logger.info(f"{label} elapsed time: {end - start:.2f} secs ({((end - start) / 60.0):.2f} mins)")
 
 
 ### TODO: weave through second round intercept.

diff --git a/sourcecode/scoring/contributor_state.py b/sourcecode/scoring/contributor_state.py
@@ -1,10 +1,16 @@
+import logging
+
 from . import constants as c, explanation_tags
 from .helpfulness_scores import author_helpfulness
 from .note_ratings import get_ratings_with_scores, get_valid_ratings
 
 import pandas as pd
 
 
+logger = logging.getLogger("birdwatch.contributor_state")
+logger.setLevel(logging.INFO)
+
+
 def should_earn_in(contributorScoresWithEnrollment: pd.DataFrame):
   """
   The participant should earn in when they are in the earnedOutAcknowledged, earnedoutNoAck and newUser state.
@@ -124,21 +130,21 @@ def _get_rated_after_decision(
   assert (
     len(ratingInfos) == len(ratings)
   ), f"assigning a status timestamp shouldn't decrease number of ratings: {len(ratingInfos)} vs. {len(ratings)}"
-  print("Calculating ratedAfterDecision:")
-  print(f"  Total ratings: {len(ratingInfos)}")
+  logger.info("Calculating ratedAfterDecision:")
+  logger.info(f"  Total ratings: {len(ratingInfos)}")
   ratingInfos = ratingInfos[~pd.isna(ratingInfos[c.timestampMillisOfNoteMostRecentNonNMRLabelKey])]
-  print(f"  Total ratings on notes with status: {len(ratingInfos)}")
+  logger.info(f"  Total ratings on notes with status: {len(ratingInfos)}")
   ratingInfos = ratingInfos[
     ratingInfos[c.createdAtMillisKey] > ratingInfos[c.timestampMillisOfNoteMostRecentNonNMRLabelKey]
   ]
-  print(f"  Total ratings after status: {len(ratingInfos)}")
+  logger.info(f"  Total ratings after status: {len(ratingInfos)}")
   ratingInfos[c.ratedAfterDecision] = 1
   ratedAfterDecision = (
     ratingInfos[[c.raterParticipantIdKey, c.ratedAfterDecision]]
     .groupby(c.raterParticipantIdKey)
     .sum()
   )
-  print(f"  Total raters rating after decision: {len(ratedAfterDecision)}")
+  logger.info(f"  Total raters rating after decision: {len(ratedAfterDecision)}")
   return ratedAfterDecision
 
 
@@ -421,7 +427,7 @@ def get_contributor_state(
   ratings: pd.DataFrame,
   noteStatusHistory: pd.DataFrame,
   userEnrollment: pd.DataFrame,
-  logging: bool = True,
+  log: bool = True,
 ) -> pd.DataFrame:
   """
   Given scored notes, ratings, note status history, the current user enrollment state, this
@@ -433,7 +439,7 @@ def get_contributor_state(
       ratings (pd.DataFrame): all ratings
       noteStatusHistory (pd.DataFrame): history of note statuses
       userEnrollment (pd.DataFrame): User enrollment for BW participants.
-      logging (bool): Should we log
+      log (bool): Should we log
   Returns:
       pd.DataFrame: contributorScoresWithEnrollment The contributor scores with enrollments
   """
@@ -582,27 +588,22 @@ def get_contributor_state(
     # users that do not have an id.
     contributorScoresWithEnrollment.dropna(subset=[c.raterParticipantIdKey], inplace=True)
 
-  if logging:
-    print("Enrollment State")
-    print(
-      "Number of Earned In",
-      len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 0]),
+  if log:
+    logger.info("Enrollment State")
+    logger.info(
+      f"Number of Earned In {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 0])}"
     )
-    print(
-      "Number At Risk",
-      len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 1]),
+    logger.info(
+      f"Number At Risk {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 1])}"
     )
-    print(
-      "Number of Earn Out No Ack",
-      len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 2]),
+    logger.info(
+      f"Number of Earn Out No Ack {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 2])}"
     )
-    print(
-      "Number of Earned Out Ack",
-      len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 3]),
+    logger.info(
+      f"Number of Earned Out Ack {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 3])}"
     )
-    print(
-      "Number of New Users",
-      len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 4]),
+    logger.info(
+      f"Number of New Users {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 4])}"
     )
 
   return contributorScoresWithEnrollment, mappedUserEnrollment
@@ -615,7 +616,7 @@ def get_contributor_scores(
   lastNNotes=-1,
   countNMRNotesLast: bool = False,
   sinceLastEarnOut: bool = False,
-  logging: bool = True,
+  log: bool = True,
 ) -> pd.DataFrame:
   """
   Given the outputs of the MF model, this function aggregates stats over notes and ratings. The
@@ -628,7 +629,7 @@ def get_contributor_scores(
       lastNNotes (int): count over the last n notes
       countNMRNotesLast (bool): count NMR notes last. Useful when you want to calculate over a limited set of CRH + CRNH notes
       sinceLastEarnOut: only count notes since last Earn Out event
-      logging (bool): Should we log?
+      log (bool): Should we log?
   Returns:
       pd.DataFrame: contributorScores - rating + note aggregates per contributor.
   """
@@ -676,7 +677,7 @@ def get_contributor_scores(
     ]
   )
 
-  if logging:
-    print("Number Contributor Counts: ", len(contributorCounts))
+  if log:
+    logger.info(f"Number Contributor Counts: {len(contributorCounts)}")
 
   return contributorCounts
diff --git a/sourcecode/scoring/helpfulness_scores.py b/sourcecode/scoring/helpfulness_scores.py
@@ -1,3 +1,4 @@
+import logging
 from typing import Optional
 
 from . import constants as c
@@ -6,6 +7,10 @@
 import pandas as pd
 
 
+logger = logging.getLogger("birdwatch.helpfulness_scores")
+logger.setLevel(logging.INFO)
+
+
 def author_helpfulness(
   scoredNotes: pd.DataFrame,
   noteInterceptKey: str,
@@ -199,15 +204,15 @@ def compute_general_helpfulness_scores(
 def filter_ratings_by_helpfulness_scores(
   ratingsForTraining: pd.DataFrame,
   helpfulnessScores: pd.DataFrame,
-  logging: bool = True,
+  log: bool = True,
 ):
   """Filter out ratings from raters whose helpfulness scores are too low.
   See https://twitter.github.io/communitynotes/contributor-scores/#filtering-ratings-based-on-helpfulness-scores.
 
   Args:
       ratingsForTraining pandas.DataFrame: unfiltered input ratings
       helpfulnessScores pandas.DataFrame: helpfulness scores to use to determine which raters to filter out.
-      logging (bool, optional): debug output. Defaults to True.
+      log (bool, optional): debug output. Defaults to True.
 
   Returns:
       filtered_ratings pandas.DataFrame: same schema as input ratings, but filtered.
@@ -219,15 +224,14 @@ def filter_ratings_by_helpfulness_scores(
     ratingsForTraining, on=c.raterParticipantIdKey
   )
 
-  if logging:
-    print("Unique Raters: ", len(np.unique(ratingsForTraining[c.raterParticipantIdKey])))
-    print("People (Authors or Raters) With Helpfulness Scores: ", len(helpfulnessScores))
-    print("Raters Included Based on Helpfulness Scores: ", len(includedUsers))
-    print(
-      "Included Raters who have rated at least 1 note in the final dataset: ",
-      len(np.unique(ratingsHelpfulnessScoreFiltered[c.raterParticipantIdKey])),
+  if log:
+    logger.info(f"Unique Raters: {len(np.unique(ratingsForTraining[c.raterParticipantIdKey]))}")
+    logger.info(f"People (Authors or Raters) With Helpfulness Scores: {len(helpfulnessScores)}")
+    logger.info(f"Raters Included Based on Helpfulness Scores: {len(includedUsers)}")
+    logger.info(
+      f"Included Raters who have rated at least 1 note in the final dataset: {len(np.unique(ratingsHelpfulnessScoreFiltered[c.raterParticipantIdKey]))}",
     )
-    print("Number of Ratings Used For 1st Training: ", len(ratingsForTraining))
-    print("Number of Ratings for Final Training: ", len(ratingsHelpfulnessScoreFiltered))
+    logger.info(f"Number of Ratings Used For 1st Training: {len(ratingsForTraining)}")
+    logger.info(f"Number of Ratings for Final Training: {len(ratingsHelpfulnessScoreFiltered)}")
 
   return ratingsHelpfulnessScoreFiltered