Skip to content

Commit

Permalink
Merge pull request #257 from twitter/jbaxter/2024_08_21
Browse files Browse the repository at this point in the history
New NSH columns and cleanup
  • Loading branch information
jbaxter authored Aug 21, 2024
2 parents 956d8bd + 167be83 commit 2d6d6a5
Show file tree
Hide file tree
Showing 22 changed files with 421 additions and 303 deletions.
13 changes: 10 additions & 3 deletions sourcecode/scoring/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from contextlib import contextmanager
from dataclasses import dataclass
from enum import Enum
import logging
import os
import time
from typing import Dict, Optional, Set
Expand All @@ -9,6 +10,10 @@
import pandas as pd


logger = logging.getLogger("birdwatch.constants")
logger.setLevel(logging.INFO)


# Default number of threads to use in torch if os.cpu_count() is unavailable
# and no value is specified.
defaultNumThreads = os.cpu_count() or 8
Expand Down Expand Up @@ -461,6 +466,8 @@ def rater_factor_key(i):
updatedTimestampMillisOfNmrDueToMinStableCrhTimeKey = (
"updatedTimestampMillisOfNmrDueToMinStableCrhTime"
)
timestampMinuteOfFinalScoringOutput = "timestampMinuteOfFinalScoringOutput"
timestampMillisOfFirstNmrDueToMinStableCrhTimeKey = "timestampMillisOfFirstNmrDueToMinStableCrhTime"

noteStatusHistoryTSVColumnsAndTypes = [
(noteIdKey, np.int64),
Expand All @@ -484,6 +491,8 @@ def rater_factor_key(i):
(timestampMillisOfNmrDueToMinStableCrhTimeKey, np.double), # double because nullable.
(currentMultiGroupStatusKey, "category"),
(currentModelingMultiGroupKey, np.double), # TODO: int
(timestampMinuteOfFinalScoringOutput, np.double), # double because nullable.
(timestampMillisOfFirstNmrDueToMinStableCrhTimeKey, np.double), # double because nullable.
]
noteStatusHistoryTSVColumns = [col for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]
noteStatusHistoryTSVTypes = [dtype for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]
Expand Down Expand Up @@ -818,8 +827,6 @@ def rater_factor_key(i):
inputPathsTSVColumns = [col for (col, _) in inputPathsTSVColumnsAndTypes]
inputPathsTSVTypeMapping = {col: dtype for (col, dtype) in inputPathsTSVColumnsAndTypes}

timestampMinuteOfFinalScoringOutput = "timestampMinuteOfFinalScoringOutput"


@contextmanager
def time_block(label):
Expand All @@ -828,7 +835,7 @@ def time_block(label):
yield
finally:
end = time.time()
print(f"{label} elapsed time: {end - start:.2f} secs ({((end - start) / 60.0):.2f} mins)")
logger.info(f"{label} elapsed time: {end - start:.2f} secs ({((end - start) / 60.0):.2f} mins)")


### TODO: weave through second round intercept.
Expand Down
57 changes: 29 additions & 28 deletions sourcecode/scoring/contributor_state.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import logging

from . import constants as c, explanation_tags
from .helpfulness_scores import author_helpfulness
from .note_ratings import get_ratings_with_scores, get_valid_ratings

import pandas as pd


logger = logging.getLogger("birdwatch.contributor_state")
logger.setLevel(logging.INFO)


def should_earn_in(contributorScoresWithEnrollment: pd.DataFrame):
"""
The participant should earn in when they are in the earnedOutAcknowledged, earnedoutNoAck and newUser state.
Expand Down Expand Up @@ -124,21 +130,21 @@ def _get_rated_after_decision(
assert (
len(ratingInfos) == len(ratings)
), f"assigning a status timestamp shouldn't decrease number of ratings: {len(ratingInfos)} vs. {len(ratings)}"
print("Calculating ratedAfterDecision:")
print(f" Total ratings: {len(ratingInfos)}")
logger.info("Calculating ratedAfterDecision:")
logger.info(f" Total ratings: {len(ratingInfos)}")
ratingInfos = ratingInfos[~pd.isna(ratingInfos[c.timestampMillisOfNoteMostRecentNonNMRLabelKey])]
print(f" Total ratings on notes with status: {len(ratingInfos)}")
logger.info(f" Total ratings on notes with status: {len(ratingInfos)}")
ratingInfos = ratingInfos[
ratingInfos[c.createdAtMillisKey] > ratingInfos[c.timestampMillisOfNoteMostRecentNonNMRLabelKey]
]
print(f" Total ratings after status: {len(ratingInfos)}")
logger.info(f" Total ratings after status: {len(ratingInfos)}")
ratingInfos[c.ratedAfterDecision] = 1
ratedAfterDecision = (
ratingInfos[[c.raterParticipantIdKey, c.ratedAfterDecision]]
.groupby(c.raterParticipantIdKey)
.sum()
)
print(f" Total raters rating after decision: {len(ratedAfterDecision)}")
logger.info(f" Total raters rating after decision: {len(ratedAfterDecision)}")
return ratedAfterDecision


Expand Down Expand Up @@ -421,7 +427,7 @@ def get_contributor_state(
ratings: pd.DataFrame,
noteStatusHistory: pd.DataFrame,
userEnrollment: pd.DataFrame,
logging: bool = True,
log: bool = True,
) -> pd.DataFrame:
"""
Given scored notes, ratings, note status history, the current user enrollment state, this
Expand All @@ -433,7 +439,7 @@ def get_contributor_state(
ratings (pd.DataFrame): all ratings
noteStatusHistory (pd.DataFrame): history of note statuses
userEnrollment (pd.DataFrame): User enrollment for BW participants.
logging (bool): Should we log
log (bool): Should we log
Returns:
pd.DataFrame: contributorScoresWithEnrollment The contributor scores with enrollments
"""
Expand Down Expand Up @@ -582,27 +588,22 @@ def get_contributor_state(
# users that do not have an id.
contributorScoresWithEnrollment.dropna(subset=[c.raterParticipantIdKey], inplace=True)

if logging:
print("Enrollment State")
print(
"Number of Earned In",
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 0]),
if log:
logger.info("Enrollment State")
logger.info(
f"Number of Earned In {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 0])}"
)
print(
"Number At Risk",
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 1]),
logger.info(
f"Number At Risk {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 1])}"
)
print(
"Number of Earn Out No Ack",
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 2]),
logger.info(
f"Number of Earn Out No Ack {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 2])}"
)
print(
"Number of Earned Out Ack",
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 3]),
logger.info(
f"Number of Earned Out Ack {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 3])}"
)
print(
"Number of New Users",
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 4]),
logger.info(
f"Number of New Users {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 4])}"
)

return contributorScoresWithEnrollment, mappedUserEnrollment
Expand All @@ -615,7 +616,7 @@ def get_contributor_scores(
lastNNotes=-1,
countNMRNotesLast: bool = False,
sinceLastEarnOut: bool = False,
logging: bool = True,
log: bool = True,
) -> pd.DataFrame:
"""
Given the outputs of the MF model, this function aggregates stats over notes and ratings. The
Expand All @@ -628,7 +629,7 @@ def get_contributor_scores(
lastNNotes (int): count over the last n notes
countNMRNotesLast (bool): count NMR notes last. Useful when you want to calculate over a limited set of CRH + CRNH notes
sinceLastEarnOut: only count notes since last Earn Out event
logging (bool): Should we log?
log (bool): Should we log?
Returns:
pd.DataFrame: contributorScores - rating + note aggregates per contributor.
"""
Expand Down Expand Up @@ -676,7 +677,7 @@ def get_contributor_scores(
]
)

if logging:
print("Number Contributor Counts: ", len(contributorCounts))
if log:
logger.info(f"Number Contributor Counts: {len(contributorCounts)}")

return contributorCounts
26 changes: 15 additions & 11 deletions sourcecode/scoring/helpfulness_scores.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from typing import Optional

from . import constants as c
Expand All @@ -6,6 +7,10 @@
import pandas as pd


logger = logging.getLogger("birdwatch.helpfulness_scores")
logger.setLevel(logging.INFO)


def author_helpfulness(
scoredNotes: pd.DataFrame,
noteInterceptKey: str,
Expand Down Expand Up @@ -199,15 +204,15 @@ def compute_general_helpfulness_scores(
def filter_ratings_by_helpfulness_scores(
ratingsForTraining: pd.DataFrame,
helpfulnessScores: pd.DataFrame,
logging: bool = True,
log: bool = True,
):
"""Filter out ratings from raters whose helpfulness scores are too low.
See https://twitter.github.io/communitynotes/contributor-scores/#filtering-ratings-based-on-helpfulness-scores.
Args:
ratingsForTraining pandas.DataFrame: unfiltered input ratings
helpfulnessScores pandas.DataFrame: helpfulness scores to use to determine which raters to filter out.
logging (bool, optional): debug output. Defaults to True.
log (bool, optional): debug output. Defaults to True.
Returns:
filtered_ratings pandas.DataFrame: same schema as input ratings, but filtered.
Expand All @@ -219,15 +224,14 @@ def filter_ratings_by_helpfulness_scores(
ratingsForTraining, on=c.raterParticipantIdKey
)

if logging:
print("Unique Raters: ", len(np.unique(ratingsForTraining[c.raterParticipantIdKey])))
print("People (Authors or Raters) With Helpfulness Scores: ", len(helpfulnessScores))
print("Raters Included Based on Helpfulness Scores: ", len(includedUsers))
print(
"Included Raters who have rated at least 1 note in the final dataset: ",
len(np.unique(ratingsHelpfulnessScoreFiltered[c.raterParticipantIdKey])),
if log:
logger.info(f"Unique Raters: {len(np.unique(ratingsForTraining[c.raterParticipantIdKey]))}")
logger.info(f"People (Authors or Raters) With Helpfulness Scores: {len(helpfulnessScores)}")
logger.info(f"Raters Included Based on Helpfulness Scores: {len(includedUsers)}")
logger.info(
f"Included Raters who have rated at least 1 note in the final dataset: {len(np.unique(ratingsHelpfulnessScoreFiltered[c.raterParticipantIdKey]))}",
)
print("Number of Ratings Used For 1st Training: ", len(ratingsForTraining))
print("Number of Ratings for Final Training: ", len(ratingsHelpfulnessScoreFiltered))
logger.info(f"Number of Ratings Used For 1st Training: {len(ratingsForTraining)}")
logger.info(f"Number of Ratings for Final Training: {len(ratingsHelpfulnessScoreFiltered)}")

return ratingsHelpfulnessScoreFiltered
Loading

0 comments on commit 2d6d6a5

Please sign in to comment.