Skip to content

Commit

Permalink
Update pid resolution algorithm to fix seriesId inconsistencies
Browse files Browse the repository at this point in the history
Update pid resolution algorithm to fix seriesId inconsistencies
  • Loading branch information
rushirajnenuji committed Mar 31, 2022
1 parent c2f5b73 commit 696b124
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions src/d1_metrics_service/d1_metrics_service/pid_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def _fetch(url, an_id):
resMap.append(an_id)

params = {'wt':(None,'json'),
'fl':(None,'documents,resourceMap,seriesId,id'),
'fl':(None,'documents,resourceMap,seriesId,id,obsoletes'),
'rows':(None,1000)
}
params['fq'] = (None,"((id:" + quoteTerm(an_id) + ") OR (seriesId:" + quoteTerm(an_id) + "))")
Expand All @@ -349,12 +349,13 @@ def _fetch(url, an_id):
logging.debug(response.text)
resMap = _getIdsFromSolrResponse(response.text,resMap)
more_resMap_work = True
params['fl'] = (None,'documents,obsoletes')
params['fl'] = (None,'documents,resourceMap,seriesId,id,obsoletes')

while more_resMap_work:
current_length = len(resMap)
query = ") OR (".join(map(quoteTerm, resMap))
params['fq'] = (None,"id:((" + query + "))")
params['fq'] = (None,"id:((" + query + ")) OR seriesId:((" + query + "))")
response = _doPost(session, url, params, use_mm=use_mm_params)
if response.status_code == requests.codes.ok:
resMap = _getIdsFromSolrResponse(response.text, resMap)
Expand Down Expand Up @@ -777,9 +778,15 @@ def eg_getResolvePids():


def test_getResolvePids():
pids = ["oai:figshare.com:article/7438598", ]
pids = ["doi:10.15485/1603775", "ess-dive-d3dc26585e68115-20210722T195610978"]
res = getResolvePIDs(pids)
#res = getResolvePIDs(pids)

# test the differences
pprint(set(res['ess-dive-d3dc26585e68115-20210722T195610978']).difference(set(res['doi:10.15485/1603775'])))
pprint(set(res['doi:10.15485/1603775']).difference(set(res['ess-dive-d3dc26585e68115-20210722T195610978'])))

# print the results dict
pprint(res, indent=2)


Expand Down

1 comment on commit 696b124

@rushirajnenuji
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reference: #83

Please sign in to comment.