-
Notifications
You must be signed in to change notification settings - Fork 216
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
backfillmoderationdecision
management command (#4415)
- Loading branch information
Showing
4 changed files
with
228 additions
and
2 deletions.
There are no files selected for viewing
121 changes: 121 additions & 0 deletions
121
api/api/management/commands/backfillmoderationdecision.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import argparse | ||
|
||
from django.contrib.auth import get_user_model | ||
|
||
from django_tqdm import BaseCommand | ||
|
||
from api.constants.moderation import DecisionAction | ||
from api.models import ( | ||
AudioDecision, | ||
AudioDecisionThrough, | ||
AudioReport, | ||
ImageDecision, | ||
ImageDecisionThrough, | ||
ImageReport, | ||
) | ||
from api.models.media import DMCA, MATURE_FILTERED, NO_ACTION, PENDING | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Back-fill the moderation decision table for a given media type." | ||
batch_size = 3 | ||
|
||
@staticmethod | ||
def add_arguments(parser): | ||
parser.add_argument( | ||
"--dry-run", | ||
help="Count reports to process, and don't do anything else.", | ||
type=bool, | ||
default=True, | ||
action=argparse.BooleanOptionalAction, | ||
) | ||
parser.add_argument( | ||
"--media-type", | ||
help="The media type to back-fill moderation decisions.", | ||
type=str, | ||
default="image", | ||
choices=["image", "audio"], | ||
) | ||
parser.add_argument( | ||
"--moderator", | ||
help="The username of the moderator to attribute the decisions to.", | ||
type=str, | ||
default="opener", | ||
) | ||
|
||
def handle(self, *args, **options): | ||
dry = options["dry_run"] | ||
username = options["moderator"] | ||
media_type = options["media_type"] | ||
|
||
MediaReport = ImageReport | ||
MediaDecision = ImageDecision | ||
MediaDecisionThrough = ImageDecisionThrough | ||
if media_type == "audio": | ||
MediaReport = AudioReport | ||
MediaDecision = AudioDecision | ||
MediaDecisionThrough = AudioDecisionThrough | ||
|
||
non_pending_reports = MediaReport.objects.filter(decision=None).exclude( | ||
status=PENDING | ||
) | ||
count_to_process = non_pending_reports.count() | ||
|
||
if dry: | ||
self.info( | ||
f"{count_to_process} {media_type} reports to back-fill. " | ||
f"This is a dry run, exiting without making changes." | ||
) | ||
return | ||
|
||
if not count_to_process: | ||
self.info("No reports to process.") | ||
return | ||
|
||
t = self.tqdm(total=count_to_process // self.batch_size) | ||
User = get_user_model() | ||
try: | ||
moderator = User.objects.get(username=username) | ||
except User.DoesNotExist: | ||
t.error(f"User '{username}' not found.") | ||
return | ||
|
||
while reports_chunk := non_pending_reports[: self.batch_size]: | ||
decisions = MediaDecision.objects.bulk_create( | ||
MediaDecision( | ||
action=self.get_action(report), | ||
moderator=moderator, | ||
notes="__backfilled_from_report_status", | ||
) | ||
for report in reports_chunk | ||
) | ||
for report, decision in zip(reports_chunk, decisions): | ||
report.decision = decision | ||
MediaReport.objects.bulk_update(reports_chunk, ["decision"]) | ||
MediaDecisionThrough.objects.bulk_create( | ||
[ | ||
MediaDecisionThrough(media_obj=report.media_obj, decision=decision) | ||
for report, decision in zip(reports_chunk, decisions) | ||
] | ||
) | ||
t.update(1) | ||
|
||
t.info( | ||
self.style.SUCCESS( | ||
f"Created {count_to_process} {media_type} moderation decisions from existing reports." | ||
) | ||
) | ||
|
||
@staticmethod | ||
def get_action(report): | ||
if report.status == MATURE_FILTERED: | ||
return DecisionAction.MARKED_SENSITIVE | ||
|
||
if report.status == NO_ACTION: | ||
return DecisionAction.REJECTED_REPORTS | ||
|
||
# Cases with status = DEINDEXED | ||
if report.reason == DMCA: | ||
return DecisionAction.DEINDEXED_COPYRIGHT | ||
|
||
return DecisionAction.DEINDEXED_SENSITIVE # For reasons MATURE and OTHER |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
95 changes: 95 additions & 0 deletions
95
api/test/unit/management/commands/test_backfillmoderationdecision.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from io import StringIO | ||
|
||
from django.core.management import call_command | ||
|
||
import pytest | ||
|
||
from api.constants.moderation import DecisionAction | ||
from api.models import ( | ||
DEINDEXED, | ||
DMCA, | ||
MATURE, | ||
MATURE_FILTERED, | ||
NO_ACTION, | ||
OTHER, | ||
AudioDecision, | ||
AudioDecisionThrough, | ||
ImageDecision, | ||
ImageDecisionThrough, | ||
) | ||
from test.factory.models.audio import AudioReportFactory | ||
from test.factory.models.image import ImageReportFactory | ||
from test.factory.models.oauth2 import UserFactory | ||
|
||
|
||
def call_cmd(**options): | ||
out = StringIO() | ||
err = StringIO() | ||
call_command( | ||
"backfillmoderationdecision", | ||
**options, | ||
stdout=out, | ||
stderr=err, | ||
) | ||
res = out.getvalue(), err.getvalue() | ||
print(res) | ||
|
||
return res | ||
|
||
|
||
def make_reports(media_type, reason: str, status: str, count: int = 1): | ||
if media_type == "audio": | ||
return AudioReportFactory.create_batch(count, status=status, reason=reason) | ||
else: | ||
return ImageReportFactory.create_batch(count, status=status, reason=reason) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("reason", "status", "expected_action"), | ||
( | ||
(MATURE, MATURE_FILTERED, DecisionAction.MARKED_SENSITIVE), | ||
(DMCA, MATURE_FILTERED, DecisionAction.MARKED_SENSITIVE), | ||
(OTHER, MATURE_FILTERED, DecisionAction.MARKED_SENSITIVE), | ||
(MATURE, NO_ACTION, DecisionAction.REJECTED_REPORTS), | ||
(DMCA, NO_ACTION, DecisionAction.REJECTED_REPORTS), | ||
(OTHER, NO_ACTION, DecisionAction.REJECTED_REPORTS), | ||
(MATURE, DEINDEXED, DecisionAction.DEINDEXED_SENSITIVE), | ||
(DMCA, DEINDEXED, DecisionAction.DEINDEXED_COPYRIGHT), | ||
(OTHER, DEINDEXED, DecisionAction.DEINDEXED_SENSITIVE), | ||
), | ||
) | ||
@pytest.mark.parametrize(("media_type"), ("image", "audio")) | ||
@pytest.mark.django_db | ||
def test_create_moderation_decision_for_reports( | ||
media_type, reason, status, expected_action | ||
): | ||
username = "opener" | ||
UserFactory.create(username=username) | ||
|
||
report = make_reports(media_type=media_type, reason=reason, status=status)[0] | ||
|
||
out, err = call_cmd(dry_run=False, media_type=media_type, moderator=username) | ||
|
||
MediaDecision = ImageDecision if media_type == "image" else AudioDecision | ||
MediaDecisionThrough = ( | ||
ImageDecisionThrough if media_type == "image" else AudioDecisionThrough | ||
) | ||
assert MediaDecision.objects.count() == 1 | ||
assert f"Created 1 {media_type} moderation decisions from existing reports." in out | ||
|
||
decision = MediaDecision.objects.first() | ||
assert decision.media_objs.count() == 1 | ||
assert decision.action == expected_action | ||
assert decision.moderator.username == username | ||
|
||
decision_through = MediaDecisionThrough.objects.first() | ||
assert decision_through.media_obj == report.media_obj | ||
assert decision_through.decision == decision | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_catch_user_exception(): | ||
make_reports(media_type="image", reason=MATURE, status=MATURE_FILTERED) | ||
_, err = call_cmd(dry_run=False, moderator="nonexistent") | ||
|
||
assert "User 'nonexistent' not found." in err |