diff --git a/dcicutils/contribution_utils.py b/dcicutils/contribution_utils.py index ac51a20a3..1b0229610 100644 --- a/dcicutils/contribution_utils.py +++ b/dcicutils/contribution_utils.py @@ -4,6 +4,7 @@ import json import os import re +import warnings from collections import defaultdict from dcicutils.diff_utils import DiffManager @@ -33,10 +34,14 @@ def git_commits(cls, repo_name) -> List[git.Commit]: for commit in repo.iter_commits(): yield cls.json_for_commit(commit) + @classmethod + def author_name(cls, actor: git.Actor) -> str: + return actor.name or actor.email.split('@')[0] + @classmethod def json_for_actor(cls, actor: git.Actor) -> Dict: return { - "name": actor.name, + "name": cls.author_name(actor), "email": actor.email, } @@ -55,7 +60,7 @@ class Contributor: @classmethod def create(cls, *, author: git.Actor) -> 'Contributor': - return Contributor(email=author.email, name=author.name) + return Contributor(email=author.email, name=GitAnalysis.author_name(author)) def __init__(self, *, email: Optional[str] = None, name: Optional[str] = None, emails: Optional[Set[str]] = None, names: Optional[Set[str]] = None, @@ -480,11 +485,12 @@ def notice_author(*, author: git.Actor, date: datetime.datetime): # PRINT(f"Post-fork contribution from {author.email} ({date})") post_fork_contributors_seen[author.email].append(date) self.notice_reference_time(key=author.email, timestamp=date, timestamps=self.email_timestamps) - self.notice_reference_time(key=author.name, timestamp=date, timestamps=self.name_timestamps) + self.notice_reference_time(key=GitAnalysis.author_name(author), timestamp=date, + timestamps=self.name_timestamps) contributor_by_email = contributors_by_email.get(author.email) if contributor_by_email: # already exists, so update it - contributor_by_email.notice_mention_as(email=author.email, name=author.name) + contributor_by_email.notice_mention_as(email=author.email, name=GitAnalysis.author_name(author)) else: # need to create it new contributor_by_email = Contributor.create(author=author) contributors_by_email[author.email] = contributor_by_email @@ -551,6 +557,7 @@ def traverse(cls, contributors_by_name=contributors_by_name, seen=seen) for email in list(cursor.emails): contributor = contributors_by_email.get(email) - if contributor and contributor not in seen: + if contributor and contributor not in seen: # pragma: no cover - shouldn't happen, included 'just in case' + warnings.warn(f"Unexpected stray email seen: {email}") cls.traverse(root=root, cursor=contributor, contributors_by_email=contributors_by_email, contributors_by_name=contributors_by_name, seen=seen) diff --git a/test/test_contribution_utils.py b/test/test_contribution_utils.py index 062b97ef1..1095480e6 100644 --- a/test/test_contribution_utils.py +++ b/test/test_contribution_utils.py @@ -736,17 +736,27 @@ def test_contributions_init_with_fork_and_no_cache(): mocked_foo_commits: List[Dict] = [ { - "hexsha": "bbbb", + "hexsha": "ffff", + "committed_datetime": "2020-01-05T12:34:56-05:00", + "author": {"name": None, "email": "sal@foo"}, + "message": "something else" + }, + { + "hexsha": "eeee", + "committed_datetime": "2020-01-05T12:34:56-05:00", + "author": {"name": "Sal", "email": "ssmith@foo"}, + "message": "something else" + }, + { + "hexsha": "dddd", "committed_datetime": "2020-01-04T12:34:56-05:00", - "author": {"name": "Sally", "email": "sally.smith@foo"}, - "co_authors": [{"name": "William Simmons", "email": "bill@someplace"}], + "author": {"name": "Sally Smith", "email": "ssmith@foo"}, "message": "something else" }, { - "hexsha": "bbbb", + "hexsha": "cccc", "committed_datetime": "2020-01-03T12:34:56-05:00", - "author": {"name": "Sally Smith", "email": "ssmith@foo"}, - "co_authors": [{"name": "William Simmons", "email": "bill@someplace"}], + "author": {"name": "Sally Smith", "email": "sally.smith@foo"}, "message": "something else" }, { @@ -787,12 +797,15 @@ def test_contributions_init_with_fork_and_no_cache(): } assert contributions.contributor_values_as_dicts(contributions.contributors_by_email) == { - "sally.smith@foo": {"names": ["Sally", "Sally Smith"], "emails": ["sally.smith@foo", "ssmith@foo"]}, - "ssmith@foo": {"names": ["Sally", "Sally Smith"], "emails": ["sally.smith@foo", "ssmith@foo"]}, + "sal@foo": {"names": ["sal"], "emails": ["sal@foo"]}, + "sally.smith@foo": {"names": ["Sal", "Sally", "Sally Smith"], + "emails": ["sally.smith@foo", "ssmith@foo"]}, + "ssmith@foo": {"names": ["Sal", "Sally", "Sally Smith"], "emails": ["sally.smith@foo", "ssmith@foo"]}, "bill@someplace": {"names": ["William Simmons"], "emails": ["bill@someplace"]}, } assert contributions.contributor_values_as_dicts(contributions.contributors_by_name) == { - "Sally Smith": {"names": ["Sally", "Sally Smith"], "emails": ["sally.smith@foo", "ssmith@foo"]}, + "sal": {"names": ["sal"], "emails": ["sal@foo"]}, + "Sally Smith": {"names": ["Sal", "Sally", "Sally Smith"], "emails": ["sally.smith@foo", "ssmith@foo"]}, "William Simmons": {"names": ["William Simmons"], "emails": ["bill@someplace"]}, }