-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5e25140
commit cf7b59f
Showing
30 changed files
with
935 additions
and
166 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
#!/usr/bin/env python3 | ||
|
||
''' | ||
This script demonstrates how to use the Semantic Scholar API to search for papers | ||
and retrieve their details. | ||
''' | ||
|
||
import requests | ||
import time | ||
from jinja2 import Environment, FileSystemLoader | ||
import markdownify | ||
import utils | ||
|
||
# Define the paper search endpoint URL | ||
URL = 'https://api.semanticscholar.org/graph/v1/paper/search/bulk' | ||
# Define the required query parameter and its value | ||
# (in this case, the keyword we want to search for) | ||
BASE_PARAMS = { | ||
# 'limit': 5, | ||
'publicationTypes': 'JournalArticle', | ||
# 'year': '2020-', | ||
'fields': 'paperId,url,journal,\ | ||
title,publicationTypes,publicationDate,\ | ||
citationCount,publicationVenue', | ||
# 'sort': 'citationCount:desc', | ||
'token': None | ||
} | ||
N = 10 | ||
DIC = {} | ||
|
||
def fetch_articles(search_query, | ||
sort='citationCount:desc') -> list: | ||
""" | ||
Return the most cited articles for a given query | ||
Args: | ||
query (str): query to search for | ||
Returns: | ||
list: list of articles | ||
""" | ||
query_params = BASE_PARAMS.copy() | ||
query_params['query'] = search_query | ||
query_params['sort'] = sort | ||
|
||
fetched_data = [] | ||
while True: | ||
status_code_429 = 0 | ||
while True: | ||
# Make the GET request to the paper search endpoint with the URL and query parameters | ||
search_response = requests.get(URL, params=query_params, timeout=None) | ||
print ('status code', search_response.status_code) | ||
# WHen the status code is 429, sleep for 5 minutes | ||
if search_response.status_code == 429: | ||
status_code_429 += 1 | ||
if status_code_429 > 10: | ||
print ('Too many requests!') | ||
print ('Sleeping for 5 minutes and 10 seconds....') | ||
time.sleep(310) | ||
continue | ||
# When the status code is 200, break the loop | ||
if search_response.status_code == 200: | ||
break | ||
search_response_json = search_response.json() | ||
fetched_data += search_response_json['data'] | ||
# End the loop if we have fetched enough data | ||
# or if there is no more data to fetch | ||
if len(fetched_data) >= N or search_response_json['token'] is None: | ||
break | ||
# Update the token to fetch the next page of data | ||
# if the token is not None | ||
if search_response_json['token'] is not None: | ||
query_params['token'] = search_response_json['token'] | ||
return fetched_data | ||
|
||
def create_template(template_file, category_name) -> str: | ||
""" | ||
Return the markdown content for a given template | ||
Args: | ||
template_file (str): template file | ||
most_cited_articles (list): list of most cited articles | ||
most_recent_articles (list): list of most recent articles | ||
Returns: | ||
str: markdown content | ||
""" | ||
# Set the template environment | ||
environment = Environment(loader=FileSystemLoader("../../templates/")) | ||
# Get the template | ||
template = environment.get_template(template_file) | ||
# Render the template | ||
content = template.render( | ||
most_cited_articles=DIC[category_name]['most_cited_articles'][0:N], | ||
most_recent_articles=DIC[category_name]['most_recent_articles'][0:N], | ||
category_name=category_name, | ||
title=DIC[category_name]['title'], | ||
query=DIC[category_name]['query'], | ||
hide_nav="---\nhide:\n\t- navigation---\n", | ||
) | ||
return markdownify.markdownify(content) | ||
|
||
def main(): | ||
""" | ||
Main function | ||
Args: | ||
None | ||
Returns: | ||
None | ||
""" | ||
# Work with all the categories in the file | ||
with open('../data/query.tsv', 'r', encoding='utf-8') as f: | ||
for line in f: | ||
if line.split('\t')[0] == 'Title': | ||
continue | ||
print (line.split('\t')) | ||
title = line.split('\t')[0] | ||
query = line.split('\t')[1].rstrip() | ||
category_name = title.replace(' ', '_') | ||
################################ | ||
## Fetch the most cited articles | ||
data = fetch_articles(query) | ||
DIC[category_name] = {'title': title, 'query': query, 'most_cited_articles': data} | ||
plot = utils.metrics_over_time(data, category_name, title) | ||
plot.savefig(f'../../docs/assets/{category_name}.png') | ||
################################ | ||
## Fetch the most recent articles | ||
data = fetch_articles(query, sort = 'publicationDate:desc') | ||
DIC[category_name]['most_recent_articles'] = data | ||
# print (data[0]) | ||
markdown_text = create_template("category.txt", category_name) | ||
# DIC[category_name]['most_cited_articles'][0:N], | ||
# DIC[category_name]['most_recent_articles'][0:N]) | ||
# Add the hide navigation | ||
markdown_text = "---\nhide:\n - navigation\n---\n" + markdown_text | ||
# Write the markdown text to a file | ||
with open(f'../../docs/{category_name}.md', 'w', encoding='utf-8') as file: | ||
file.write(markdown_text) | ||
################################ | ||
|
||
# End of file | ||
title = 'All' | ||
query = ' | '.join([category_items['query'] for _, category_items in DIC.items()]) | ||
category_name = 'All' | ||
################################ | ||
## Fetch the most cited articles | ||
data = fetch_articles(query) | ||
DIC[category_name] = {'title': title, 'query': query, 'most_cited_articles': data} | ||
plot = utils.metrics_over_time(data, category_name, title) | ||
plot.savefig(f'../../docs/assets/{category_name}.png') | ||
################################ | ||
## Fetch the most recent articles | ||
data = fetch_articles(query, sort = 'publicationDate:desc') | ||
DIC[category_name]['most_recent_articles'] = data | ||
# print (data[0]) | ||
markdown_text = create_template("category.txt", category_name) | ||
# DIC[category_name]['most_cited_articles'][0:N], | ||
# DIC[category_name]['most_recent_articles'][0:N]) | ||
# Add the hide navigation | ||
markdown_text = "---\nhide:\n - navigation\n---\n" + markdown_text | ||
# Write the markdown text to a file | ||
with open(f'../../docs/{category_name}.md', 'w', encoding='utf-8') as file: | ||
file.write(markdown_text) | ||
################################ | ||
|
||
if __name__ == '__main__': | ||
# Run the main function | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#!/usr/bin/env python3 | ||
|
||
''' | ||
script to define utility functions | ||
''' | ||
|
||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
|
||
def metrics_over_time(data, category_name, title) -> plt: | ||
""" | ||
Return the metrics over time | ||
Args: | ||
data (list): list of dictionaries | ||
category_name (str): category name | ||
title (str): title of the graph | ||
Returns: | ||
None | ||
Example: | ||
data = [ | ||
{ | ||
'title': 'title1', | ||
'publicationDate': '2020-01-01', | ||
'citationCount': 10 | ||
}, | ||
{ | ||
'title': 'title2', | ||
'publicationDate': '2020-01-01', | ||
'citationCount': 10 | ||
} | ||
] | ||
""" | ||
dic = {} | ||
for paper in data: | ||
publication_date = paper['publicationDate'] | ||
if publication_date is None or publication_date == '': | ||
continue | ||
year = publication_date.split('-')[0] | ||
if year not in dic: | ||
dic[year] = {'num_articles': 0, 'num_citations': 0} | ||
dic[year]['num_articles'] += 1 | ||
citation_count = paper['citationCount'] | ||
if citation_count is None or citation_count == '': | ||
continue | ||
dic[year]['num_citations'] += citation_count | ||
# Using noc and yop, plot the line graph with years on x-axis and number of citations on y-axis | ||
df = pd.DataFrame(dic).T | ||
# Make another colum for the year | ||
df['Year'] = df.index | ||
# Sort by year | ||
df = df.sort_values(by='Year', ascending=True) | ||
# Plot the graph | ||
ax = df.plot(x='Year', y='num_articles', kind='line', color='b', legend=False) | ||
ax.set(xlabel='Year', ylabel='Number of Articles') | ||
# Set the second y-axis | ||
ax2 = plt.twinx() | ||
df.plot(x='Year', y='num_citations', kind='line', color='r', ax=ax2, legend=False) | ||
ax2.set(ylabel='Number of Citations') | ||
# plot legend inside the graph and set its text | ||
ax.figure.legend(loc='upper center', ncol=2) | ||
# Set the title with bold font | ||
# plt.title(f'{title} Articles and Citations Over Time') | ||
# Set grid lines with a dashed style, thickness of 0.5, color grey and transparency of 0.5 | ||
# and only vertical lines | ||
ax.grid(axis='x', linestyle='--', linewidth=0.5, color='grey', alpha=0.5) | ||
# Remove top and bottom spines | ||
ax.spines['top'].set_visible(False) | ||
ax2.spines['top'].set_visible(False) | ||
# Make sure the figure doesn't get cut off | ||
plt.tight_layout() | ||
# Save the graph | ||
return plt | ||
|
||
|
||
#A26, B1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
Title Query | ||
Neural ODEs (neural ordinary differential equation) | (neural ODE) | (graph neural differential equation) | (graph neural diffusion) | (graph neural ODEs) | ||
Physics-informed GNNs (graph networks) | (physics constrain) | (learned simulator) | (learned simulation) | ||
Symbolic regression ((symbolic regression) + dynamics) | ||
PINNs (physics-informed neural computing) | ||
Latent Space Simulator (VAMP) | (latent space simul*) | (decomposition of koopman operator) | (time-lagged autoencoder) | ||
Koopman Theory (koopman*) | (transformations in hilbert space) | (linear transformation of PDEs) | (regularization of physics-informed machine learning) |
Oops, something went wrong.