-
Notifications
You must be signed in to change notification settings - Fork 1
/
filter_arxiv.py
64 lines (49 loc) · 1.91 KB
/
filter_arxiv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import bibtexparser
mavlabpapers = {}
# List of published papers
parser = bibtexparser.bparser.BibTexParser(common_strings=True)
with open('pure.bib', encoding="utf8") as bibtex_file:
bibtex_str = bibtex_file.read()
bib_database = bibtexparser.loads(bibtex_str, parser=parser)
alldoi = []
for p in bib_database.entries:
if 'doi' in p:
alldoi.append(p['doi'].lower())
#print(alldoi)
# List of arXiv papers
xparser = bibtexparser.bparser.BibTexParser(common_strings=True)
with open('./arxiv.bib', encoding="utf8") as xbibtex_file:
xbibtex_str = xbibtex_file.read()
x_database = bibtexparser.loads(xbibtex_str, parser=xparser)
# Store results
arxiv_database = bibtexparser.bibdatabase.BibDatabase()
rest = bibtexparser.bibdatabase.BibDatabase()
print('================================================================')
for b in x_database.entries:
hasbeenpublished = False
# If arxiv version has a DOI
if 'doi' in b:
# check in entire PURE database if the DOI is known
if b['doi'].lower() in alldoi:
hasbeenpublished = True
if not hasbeenpublished:
arxiv_database.entries.append(b)
else:
rest.entries.append(b)
print(mavlabpapers)
# dump back
writer = bibtexparser.bwriter.BibTexWriter()
writer.indent = '\t' # indent entries with 4 spaces instead of one
writer.order_entries_by = 'year'
writer.align_values = True
with open('arxiv_nopub.bib', 'w', encoding='utf8') as bibfile:
bibfile.write('# AUTOGENERATED\n# \n\n')
bibfile.write(writer.write(arxiv_database).replace('&',r'\&'))
writer = bibtexparser.bwriter.BibTexWriter()
writer.indent = '\t' # indent entries with 4 spaces instead of one
writer.order_entries_by = 'year'
writer.align_values = True
with open('arxiv_pub.bib', 'w', encoding='utf8') as bibfile:
bibfile.write('# AUTOGENERATED\n# \n\n')
bibfile.write(writer.write(rest).replace('&',r'\&'))
print('Done')