-
Notifications
You must be signed in to change notification settings - Fork 0
/
scan_s2orc_meta.py
40 lines (38 loc) · 1.53 KB
/
scan_s2orc_meta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from module.query_metadata import GettingPMID
import argparse
if __name__ == "__main__":
# Design parser
parser = argparse.ArgumentParser(
description="""Get PMIDs from Entrez and check
them for S2ORC availability. Extract relevant
articles from the S2ORC meta archives""")
parser.add_argument('-e',
'--email',
metavar='',
required=True,
help='Provide your email address (for Entrez)')
parser.add_argument('-a',
'--archives_path',
metavar='',
required=True,
help='Provide PATH to meta S2ORC archives')
parser.add_argument('-o',
'--output_file',
metavar='',
required=True,
help='Provide jsonl output FILE PATH')
parser.add_argument('-q',
'--entrez_query',
metavar='',
required=False,
help='Provide Entrez search query')
args = parser.parse_args()
# Pipeline per se
# Init GettingPMID class
entrez = GettingPMID(args.email, args.archives_path, args.output_file,
args.entrez_query)
# Get PMIDs from Pubmed
pmids = entrez.get_pmid
# Search for articles of interest in S2ORC meta archives
interest = entrez.get_articles(pmids)
final_result = entrez.parallel_process(interest)