From 696b1247dd0cb5487336cf8e1995f373d83f0d37 Mon Sep 17 00:00:00 2001 From: Rushiraj Nenuji <19696935+rushirajnenuji@users.noreply.github.com> Date: Thu, 31 Mar 2022 09:12:01 -0700 Subject: [PATCH] Update pid resolution algorithm to fix seriesId inconsistencies Update pid resolution algorithm to fix seriesId inconsistencies --- .../d1_metrics_service/pid_resolution.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/d1_metrics_service/d1_metrics_service/pid_resolution.py b/src/d1_metrics_service/d1_metrics_service/pid_resolution.py index 465f6f9..d978eab 100644 --- a/src/d1_metrics_service/d1_metrics_service/pid_resolution.py +++ b/src/d1_metrics_service/d1_metrics_service/pid_resolution.py @@ -339,7 +339,7 @@ def _fetch(url, an_id): resMap.append(an_id) params = {'wt':(None,'json'), - 'fl':(None,'documents,resourceMap,seriesId,id'), + 'fl':(None,'documents,resourceMap,seriesId,id,obsoletes'), 'rows':(None,1000) } params['fq'] = (None,"((id:" + quoteTerm(an_id) + ") OR (seriesId:" + quoteTerm(an_id) + "))") @@ -349,12 +349,13 @@ def _fetch(url, an_id): logging.debug(response.text) resMap = _getIdsFromSolrResponse(response.text,resMap) more_resMap_work = True - params['fl'] = (None,'documents,obsoletes') + params['fl'] = (None,'documents,resourceMap,seriesId,id,obsoletes') while more_resMap_work: current_length = len(resMap) query = ") OR (".join(map(quoteTerm, resMap)) params['fq'] = (None,"id:((" + query + "))") + params['fq'] = (None,"id:((" + query + ")) OR seriesId:((" + query + "))") response = _doPost(session, url, params, use_mm=use_mm_params) if response.status_code == requests.codes.ok: resMap = _getIdsFromSolrResponse(response.text, resMap) @@ -777,9 +778,15 @@ def eg_getResolvePids(): def test_getResolvePids(): - pids = ["oai:figshare.com:article/7438598", ] + pids = ["doi:10.15485/1603775", "ess-dive-d3dc26585e68115-20210722T195610978"] res = getResolvePIDs(pids) #res = getResolvePIDs(pids) + + # test the differences + pprint(set(res['ess-dive-d3dc26585e68115-20210722T195610978']).difference(set(res['doi:10.15485/1603775']))) + pprint(set(res['doi:10.15485/1603775']).difference(set(res['ess-dive-d3dc26585e68115-20210722T195610978']))) + + # print the results dict pprint(res, indent=2)