-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_scholar.py
98 lines (77 loc) · 2.84 KB
/
my_scholar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 13 16:52:58 2023
@author: tjards
"""
#%% import stuff
# ------------
from scholarly import scholarly
#source: https://github.com/scholarly-python-package/scholarly
#%% parameters
# ----------
#author_name = 'Peter Travis Jardine'
discs = ['control', 'learning', 'swarming']
degrees = ['Phd (Queen\'s)']
# this list will attempt to align keywords with disciplines (will search pub/title)
keywords_disciplines = [
['control', 'mpc', 'controller', 'regulator', 'pid', 'gain', 'systems'],
['machine learning', 'data', 'reinforcement', 'neural', 'ai', 'learning'],
['multi', 'agent', 'swarm', 'flock', 'swarming', 'swarms', 'cooperative']
]
#%% pull data
# -----------
def pull_data(author_name):
# produce an iterator for author
search_query = scholarly.search_author(author_name)
# retrieve data on first result
author = scholarly.fill(next(search_query))
print('pulled data for ', author_name)
return author
#%% Build lists
# -------------
def build_lists(author_name):
author = pull_data(author_name)
# initialize lists
pubs = []
dates = []
titles = []
connects = []
# iterate through
for i in range(0,len(author['publications'])):
if not [author['publications'][i]['bib']['citation'].lower()]:
pubs += ['unknown']
else:
pubs += [author['publications'][i]['bib']['citation'].lower()]
if not [author['publications'][i]['bib']['title'].lower()]:
titles += ['unknown']
else:
titles += [author['publications'][i]['bib']['title'].lower()]
if not [int(author['publications'][i]['bib']['pub_year'])]:
dates += [0]
else:
dates += [int(author['publications'][i]['bib']['pub_year'])]
# initialize
sublist = []
disc_index = -1
# for each discipline
for j in discs:
disc_index += 1
# search through keywords
for k in keywords_disciplines[disc_index]:
# check the publication name
if k in author['publications'][i]['bib']['citation'].lower():
sublist = sublist + [disc_index]
# check the title name
if k in author['publications'][i]['bib']['title'].lower():
sublist = sublist + [disc_index]
# if it's empty
if not sublist:
connects = connects + [list(set(sublist)) + [len(discs)]]
else:
connects = connects + [list(set(sublist))]
# normalize the dates
date_min = min(dates)
date_max = max(dates)
dates_norm = [(x - date_min + 1)/(date_max-date_min + 1) for x in dates]
return pubs, dates, dates_norm, titles, discs + ['other'], connects