forked from darvasd/upload-to-zenodo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_author_lists.py
61 lines (50 loc) · 2.18 KB
/
create_author_lists.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import re
from csv import DictWriter
import json
import os
columns = ['Name of the Country', 'Author List (draft, to be checked)']
df = pd.read_excel('Starter Kit - List of Countries.xlsx', usecols=columns,
engine='openpyxl').dropna()
df = df.rename(columns={'Name of the Country': 'country',
'Author List (draft, to be checked)': 'authors'})
lookup = df.set_index('country').to_dict()['authors']
data = pd.read_excel('authors.xlsx', engine='openpyxl',
usecols=['firstname', 'lastname', 'orcid', 'institution'], na_filter=False)
data['lookup'] = data['firstname'] + " " + data['lastname']
data = data.set_index('lookup')
regex = r"\[.{1,3}\]"
csv_file_contents = []
for country, author_list in lookup.items():
try:
remove_brackets = re.sub(regex, "", author_list).split(",")
remove_whitespace = [x.strip() for x in remove_brackets]
except TypeError as ex:
print(author_list)
raise ex
authors = []
for author in remove_whitespace:
row = data.loc[author]
rowdata = {
"name": row['lastname'] + ", " + row['firstname']
}
if row['institution'] != '':
rowdata["affiliation"] = row['institution']
if row['orcid'] != '':
rowdata["orcid"] = row['orcid']
authors.append(rowdata)
countryname = country.strip().title()
contents = {
'FILENAME': "{}.json".format(countryname),
'TITLE': "CCG Starter Data Kit: {}".format(countryname),
'ABSTRACT': "<p>A starter data kit for {}</p>".format(countryname),
'AUTHORS': json.dumps(authors),
'KEYWORDS': json.dumps(['energy', 'OSeMOSYS', '#CCG', 'clicSAND',
'energy system modelling',
'GNUMathProg', 'GLPK', 'linear programming', "{}".format(countryname)])
}
csv_file_contents.append(contents)
with open(os.path.join('data', 'data.csv'), 'w') as csvfile:
write = DictWriter(csvfile, fieldnames=["FILENAME","TITLE","ABSTRACT","AUTHORS","KEYWORDS"], lineterminator = "\n")
write.writeheader()
write.writerows(csv_file_contents)