Skip to content

Commit

Permalink
Manual Curation Usability Updates (#58)
Browse files Browse the repository at this point in the history
* Added instance checking to 'author' and 'alternate_bibcode' fields in maintenance_manual_curation().

* Updated comments and error messages

* Moved datetime bugfix to this branch. Added fix for removed empty associated works from forwarded message.

* fixed typo in forward.py

* Added error message capture to maintenance_manual_curation. Added error message display to maintenance_show_metadata.

* Moved superfluous 'and key' from db.py

* Removed update_citation_target_curator_message from db.py as it is no longer needed.

* Resolves outstanding issues about `except` from review.
  • Loading branch information
tjacovich authored Jul 12, 2022
1 parent 2b3ff78 commit 6fbf65e
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 18 deletions.
3 changes: 1 addition & 2 deletions ADSCitationCapture/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def update_citation_target_metadata(app, content, raw_metadata, parsed_metadata,
metadata_updated = _update_citation_target_metadata_session(session, content, raw_metadata, parsed_metadata, curated_metadata, status=status, bibcode=bibcode, associated=associated)
return metadata_updated


def store_citation(app, citation_change, content_type, raw_metadata, parsed_metadata, status):
"""
Stores a new citation in the DB
Expand Down Expand Up @@ -319,7 +318,7 @@ def generate_modified_metadata(parsed_metadata, curated_entry):
bad_keys=[]
if not modified_metadata.get('alternate_bibcode', None): modified_metadata.update({'alternate_bibcode':[]})
for key in curated_entry.keys():
if key not in ['bibcode', 'doi']:
if key not in ['bibcode', 'doi', 'error']:
if key in modified_metadata.keys():
try:
modified_metadata[key] = curated_entry[key]
Expand Down
2 changes: 2 additions & 0 deletions ADSCitationCapture/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,11 @@ def renormalize_author_names(authors):
to renormalize author names from curated metadata.
"""
normalized_author_names = []

for name in authors:
norm_author_name = dc.author_names._normalize(name, collaborations_params=dc.author_collaborations_params)
normalized_author_names.append(norm_author_name)

return normalized_author_names

def _parse_metadata_zenodo_doi(raw_metadata):
Expand Down
19 changes: 16 additions & 3 deletions ADSCitationCapture/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@ def build_record(app, citation_change, parsed_metadata, citations, db_versions,
normalized_authors = parsed_metadata.get('normalized_authors', [])
affiliations = parsed_metadata.get('affiliations', ['-']*len(authors))
pubdate = parsed_metadata.get('pubdate', get_date().strftime("%Y-%m-%d"))
try:
solr_date=(datetime.datetime.strptime(pubdate, "%Y-%m-%d")+datetime.timedelta(minutes=30)).strftime('%Y-%m-%dT%H:%M:%S.%fZ')
except ValueError:
try:
#In the event only a year is specified, the date is assumed to be January 1st of the given year.
logger.warn("Publication date does not conform to Y-m-d format. Assuming only year is specified.")
pubdate = pubdate+"-01"+"-01"
solr_date=(datetime.datetime.strptime(pubdate, "%Y-%m-%d")+datetime.timedelta(minutes=30)).strftime('%Y-%m-%dT%H:%M:%S.%fZ')
except ValueError:
#If above fails, just set it to the current date. Running maintenance_metadata could fix the bad publication date in the future if it is updated upstream.
logger.warn("Cannot parse publication date. Setting to current datetime.")
solr_date=date2solrstamp(entry_date)

source = parsed_metadata.get('source', "Unknown")
version = parsed_metadata.get('version', "")
doctype = parsed_metadata.get('doctype', "software")
Expand Down Expand Up @@ -84,7 +97,7 @@ def build_record(app, citation_change, parsed_metadata, citations, db_versions,
'database': ['general', 'astronomy'],
'entry_date': date2solrstamp(entry_date), # date2solrstamp(get_date()),
'year': year,
'date': (datetime.datetime.strptime(pubdate, "%Y-%m-%d")+datetime.timedelta(minutes=30)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'), # TODO: Why this date has to be 30 minutes in advance? This is based on ADSImportPipeline SolrAdapter
'date': solr_date, # TODO: Why this date has to be 30 minutes in advance? This is based on ADSImportPipeline SolrAdapter
'doctype': doctype,
'doctype_facet_hier': ["0/Non-Article", "1/Non-Article/Software"],
'doi': [doi],
Expand Down Expand Up @@ -129,7 +142,7 @@ def build_record(app, citation_change, parsed_metadata, citations, db_versions,
record_dict['status'] = status
else:
status = 0 # active
if db_versions not in [{"":""}, None]:
if db_versions not in [{"":""}, {}, None]:
record_dict['property'].append('ASSOCIATED')
if is_release:
record_dict['property'].append('RELEASE')
Expand Down Expand Up @@ -160,7 +173,7 @@ def _build_nonbib_record(app, citation_change, record, db_versions, status):
'simbad_objects': [],
'total_link_counts': 0 # Only used for DATA and not for ESOURCES
}
if db_versions not in [{"":""}, None]:
if db_versions not in [{"":""}, {}, None]:
nonbib_record_dict['data_links_rows'].append({'link_type': 'ASSOCIATED', 'link_sub_type': '',
'url': db_versions.values(), 'title': db_versions.keys(), 'item_count':0})
nonbib_record = NonBibRecord(**nonbib_record_dict)
Expand Down
50 changes: 37 additions & 13 deletions ADSCitationCapture/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,12 +622,18 @@ def task_maintenance_curation(dois, bibcodes, curated_entries, reset=False):
try:
if not reset:
if 'authors' in curated_entry.keys():
curated_entry['normalized_authors'] = doi.renormalize_author_names(curated_entry.get('authors', None))
#checks to make sure authors are in a list. Errors out if not.
if isinstance(curated_entry.get('authors', []), list):
curated_entry['normalized_authors'] = doi.renormalize_author_names(curated_entry.get('authors', None))
else:
logger.error("'author' key is not a list of authors. Stopping.")
err = "'authors' is not a valid list of strings"
raise TypeError(err)
#only check old metadata if we are adding updates, otherwise ignore.
if curated_entry != registered_record.get('curated_metadata'):
for key in registered_record['curated_metadata'].keys():
#first apply any previous edits to metadata that are not overwritten by new metadata.
if key not in curated_entry.keys():
if key != "error" and key not in curated_entry.keys():
curated_entry[key] = registered_record['curated_metadata'][key]
else:
logger.warn("Supplied metadata is identical to previously added metadata. No updates will occur.")
Expand All @@ -645,8 +651,15 @@ def task_maintenance_curation(dois, bibcodes, curated_entries, reset=False):
parsed_metadata['alternate_bibcode'] = registered_record.get('alternate_bibcode', [])
#checks for provided alt bibcodes from manual curation
if 'alternate_bibcode' in curated_entry.keys():
alternate_bibcode = list(set(alternate_bibcode+curated_entry['alternate_bibcode']))
logger.debug('alternate bibcodes are {}'.format(alternate_bibcode))
#checks to make sure alternate_bibcodes are in a list. Errors out if not.
if isinstance(curated_entry.get('alternate_bibcode', []), list):
alternate_bibcode = list(set(alternate_bibcode+curated_entry['alternate_bibcode']))
logger.debug('alternate bibcodes are {}'.format(alternate_bibcode))
else:
logger.error("'alternate_bibcodes' key is not a list of alternate_bibcodes. Stopping.")
err = "'alternate_bibcodes' is not a valid list of bibcodes"
raise TypeError(err)

#checks to make sure the main bibcode is not in the alt bibcodes
try:
alternate_bibcode.remove(modified_metadata.get('bibcode'))
Expand All @@ -669,7 +682,7 @@ def task_maintenance_curation(dois, bibcodes, curated_entries, reset=False):
#updates curated entry alt bibcodes only if a new bibcode is generated due to manual curation
curated_entry['alternate_bibcode'] = alternate_bibcode
#marks bibcode as replaced
bibcode_replaced = {'previous': registered_record['bibcode'], 'new': parsed_metadata['bibcode'] }
bibcode_replaced = {'previous': registered_record['bibcode'], 'new': new_bibcode}
#sets modified metadata alt bibcodes to match the full list of alt bibcodes.
modified_metadata['alternate_bibcode'] = alternate_bibcode

Expand Down Expand Up @@ -731,7 +744,11 @@ def task_maintenance_curation(dois, bibcodes, curated_entries, reset=False):
else:
logger.warn("Curated metadata did not result in a change to recorded metadata for {}.".format(registered_record.get('content')))
except Exception as e:
logger.error("task_maintenance_curation Failed to update metadata for {} with Exception: {}. Please check that the bibcode or doi matches a target record.".format(curated_entry, e))
err = "task_maintenance_curation Failed to update metadata for {} with Exception: {}. Please check the input data and try again.".format(curated_entry, e)
err_dict = registered_record.get('curated_metadata', {})
err_dict['error'] = err
db.update_citation_target_curator_message(app, registered_record['content'], err_dict)
logger.exception(err)
raise

def maintenance_show_metadata(curated_entries):
Expand All @@ -754,9 +771,13 @@ def maintenance_show_metadata(curated_entries):
timestamp=datetime.now()
)
try:
parsed_metadata = db.get_citation_target_metadata(app, custom_citation_change.content).get('parsed', None)
if parsed_metadata:
print(json.dumps(parsed_metadata))
metadata = db.get_citation_target_metadata(app, custom_citation_change.content)
parsed = metadata.get('parsed', None)
curated = metadata.get('curated', None)
if parsed:
print(json.dumps(parsed))
if "error" in curated.keys():
print("\n The most recent attempt to curate metadata failed with the following error: {}".format(curated.get("error", "")))

except Exception as e:
msg = "Failed to load metadata for citation {}. Please confirm information is correct and citation target is in database.".format(curated_entry)
Expand All @@ -776,11 +797,14 @@ def maintenance_show_metadata(curated_entries):
status=adsmsg.Status.updated,
timestamp=datetime.now()
)
parsed_metadata = db.get_citation_target_metadata(app, custom_citation_change.content).get('parsed', None)
try:
parsed_metadata = db.get_citation_target_metadata(app, custom_citation_change.content).get('parsed', None)
if parsed_metadata:
print(json.dumps(parsed_metadata))
metadata = db.get_citation_target_metadata(app, custom_citation_change.content)
parsed = metadata.get('parsed', None)
curated = metadata.get('curated', None)
if parsed:
print(json.dumps(parsed))
if "error" in curated.keys():
print("\n The most recent attempt to curate metadata failed with the following error: {}".format(curated.get("error", "")))

except Exception as e:
msg = "Failed to load metadata for citation {}. Please confirm information is correct and citation target is in database.".format(curated_entry)
Expand Down

0 comments on commit 6fbf65e

Please sign in to comment.