Skip to content

Commit

Permalink
chore: several updates
Browse files Browse the repository at this point in the history
  • Loading branch information
gurdeep330 committed Mar 5, 2024
1 parent 5e25140 commit cf7b59f
Show file tree
Hide file tree
Showing 30 changed files with 935 additions and 166 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ on:
# Triggers the workflow on push or pull request events but only for the main branch
# push:
# branches: [ develop ]
pull_request:
branches: [ main, develop ]
# pull_request:
# branches: [ main, develop ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
Expand Down
168 changes: 168 additions & 0 deletions app/code/literatureFetch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#!/usr/bin/env python3

'''
This script demonstrates how to use the Semantic Scholar API to search for papers
and retrieve their details.
'''

import requests
import time
from jinja2 import Environment, FileSystemLoader
import markdownify
import utils

# Define the paper search endpoint URL
URL = 'https://api.semanticscholar.org/graph/v1/paper/search/bulk'
# Define the required query parameter and its value
# (in this case, the keyword we want to search for)
BASE_PARAMS = {
# 'limit': 5,
'publicationTypes': 'JournalArticle',
# 'year': '2020-',
'fields': 'paperId,url,journal,\
title,publicationTypes,publicationDate,\
citationCount,publicationVenue',
# 'sort': 'citationCount:desc',
'token': None
}
N = 10
DIC = {}

def fetch_articles(search_query,
sort='citationCount:desc') -> list:
"""
Return the most cited articles for a given query
Args:
query (str): query to search for
Returns:
list: list of articles
"""
query_params = BASE_PARAMS.copy()
query_params['query'] = search_query
query_params['sort'] = sort

fetched_data = []
while True:
status_code_429 = 0
while True:
# Make the GET request to the paper search endpoint with the URL and query parameters
search_response = requests.get(URL, params=query_params, timeout=None)
print ('status code', search_response.status_code)
# WHen the status code is 429, sleep for 5 minutes
if search_response.status_code == 429:
status_code_429 += 1
if status_code_429 > 10:
print ('Too many requests!')
print ('Sleeping for 5 minutes and 10 seconds....')
time.sleep(310)
continue
# When the status code is 200, break the loop
if search_response.status_code == 200:
break
search_response_json = search_response.json()
fetched_data += search_response_json['data']
# End the loop if we have fetched enough data
# or if there is no more data to fetch
if len(fetched_data) >= N or search_response_json['token'] is None:
break
# Update the token to fetch the next page of data
# if the token is not None
if search_response_json['token'] is not None:
query_params['token'] = search_response_json['token']
return fetched_data

def create_template(template_file, category_name) -> str:
"""
Return the markdown content for a given template
Args:
template_file (str): template file
most_cited_articles (list): list of most cited articles
most_recent_articles (list): list of most recent articles
Returns:
str: markdown content
"""
# Set the template environment
environment = Environment(loader=FileSystemLoader("../../templates/"))
# Get the template
template = environment.get_template(template_file)
# Render the template
content = template.render(
most_cited_articles=DIC[category_name]['most_cited_articles'][0:N],
most_recent_articles=DIC[category_name]['most_recent_articles'][0:N],
category_name=category_name,
title=DIC[category_name]['title'],
query=DIC[category_name]['query'],
hide_nav="---\nhide:\n\t- navigation---\n",
)
return markdownify.markdownify(content)

def main():
"""
Main function
Args:
None
Returns:
None
"""
# Work with all the categories in the file
with open('../data/query.tsv', 'r', encoding='utf-8') as f:
for line in f:
if line.split('\t')[0] == 'Title':
continue
print (line.split('\t'))
title = line.split('\t')[0]
query = line.split('\t')[1].rstrip()
category_name = title.replace(' ', '_')
################################
## Fetch the most cited articles
data = fetch_articles(query)
DIC[category_name] = {'title': title, 'query': query, 'most_cited_articles': data}
plot = utils.metrics_over_time(data, category_name, title)
plot.savefig(f'../../docs/assets/{category_name}.png')
################################
## Fetch the most recent articles
data = fetch_articles(query, sort = 'publicationDate:desc')
DIC[category_name]['most_recent_articles'] = data
# print (data[0])
markdown_text = create_template("category.txt", category_name)
# DIC[category_name]['most_cited_articles'][0:N],
# DIC[category_name]['most_recent_articles'][0:N])
# Add the hide navigation
markdown_text = "---\nhide:\n - navigation\n---\n" + markdown_text
# Write the markdown text to a file
with open(f'../../docs/{category_name}.md', 'w', encoding='utf-8') as file:
file.write(markdown_text)
################################

# End of file
title = 'All'
query = ' | '.join([category_items['query'] for _, category_items in DIC.items()])
category_name = 'All'
################################
## Fetch the most cited articles
data = fetch_articles(query)
DIC[category_name] = {'title': title, 'query': query, 'most_cited_articles': data}
plot = utils.metrics_over_time(data, category_name, title)
plot.savefig(f'../../docs/assets/{category_name}.png')
################################
## Fetch the most recent articles
data = fetch_articles(query, sort = 'publicationDate:desc')
DIC[category_name]['most_recent_articles'] = data
# print (data[0])
markdown_text = create_template("category.txt", category_name)
# DIC[category_name]['most_cited_articles'][0:N],
# DIC[category_name]['most_recent_articles'][0:N])
# Add the hide navigation
markdown_text = "---\nhide:\n - navigation\n---\n" + markdown_text
# Write the markdown text to a file
with open(f'../../docs/{category_name}.md', 'w', encoding='utf-8') as file:
file.write(markdown_text)
################################

if __name__ == '__main__':
# Run the main function
main()
78 changes: 78 additions & 0 deletions app/code/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env python3

'''
script to define utility functions
'''

import matplotlib.pyplot as plt
import pandas as pd

def metrics_over_time(data, category_name, title) -> plt:
"""
Return the metrics over time
Args:
data (list): list of dictionaries
category_name (str): category name
title (str): title of the graph
Returns:
None
Example:
data = [
{
'title': 'title1',
'publicationDate': '2020-01-01',
'citationCount': 10
},
{
'title': 'title2',
'publicationDate': '2020-01-01',
'citationCount': 10
}
]
"""
dic = {}
for paper in data:
publication_date = paper['publicationDate']
if publication_date is None or publication_date == '':
continue
year = publication_date.split('-')[0]
if year not in dic:
dic[year] = {'num_articles': 0, 'num_citations': 0}
dic[year]['num_articles'] += 1
citation_count = paper['citationCount']
if citation_count is None or citation_count == '':
continue
dic[year]['num_citations'] += citation_count
# Using noc and yop, plot the line graph with years on x-axis and number of citations on y-axis
df = pd.DataFrame(dic).T
# Make another colum for the year
df['Year'] = df.index
# Sort by year
df = df.sort_values(by='Year', ascending=True)
# Plot the graph
ax = df.plot(x='Year', y='num_articles', kind='line', color='b', legend=False)
ax.set(xlabel='Year', ylabel='Number of Articles')
# Set the second y-axis
ax2 = plt.twinx()
df.plot(x='Year', y='num_citations', kind='line', color='r', ax=ax2, legend=False)
ax2.set(ylabel='Number of Citations')
# plot legend inside the graph and set its text
ax.figure.legend(loc='upper center', ncol=2)
# Set the title with bold font
# plt.title(f'{title} Articles and Citations Over Time')
# Set grid lines with a dashed style, thickness of 0.5, color grey and transparency of 0.5
# and only vertical lines
ax.grid(axis='x', linestyle='--', linewidth=0.5, color='grey', alpha=0.5)
# Remove top and bottom spines
ax.spines['top'].set_visible(False)
ax2.spines['top'].set_visible(False)
# Make sure the figure doesn't get cut off
plt.tight_layout()
# Save the graph
return plt


#A26, B1
7 changes: 7 additions & 0 deletions app/data/query.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Title Query
Neural ODEs (neural ordinary differential equation) | (neural ODE) | (graph neural differential equation) | (graph neural diffusion) | (graph neural ODEs)
Physics-informed GNNs (graph networks) | (physics constrain) | (learned simulator) | (learned simulation)
Symbolic regression ((symbolic regression) + dynamics)
PINNs (physics-informed neural computing)
Latent Space Simulator (VAMP) | (latent space simul*) | (decomposition of koopman operator) | (time-lagged autoencoder)
Koopman Theory (koopman*) | (transformations in hilbert space) | (linear transformation of PDEs) | (regularization of physics-informed machine learning)
Loading

0 comments on commit cf7b59f

Please sign in to comment.