Skip to content

Commit

Permalink
add new features to the pages
Browse files Browse the repository at this point in the history
  • Loading branch information
gurdeep330 committed Mar 10, 2024
1 parent 3f5c48c commit 26f14a1
Show file tree
Hide file tree
Showing 13 changed files with 864 additions and 11,042 deletions.
85 changes: 47 additions & 38 deletions app/code/literatureFetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,27 @@
and retrieve their details.
'''

import requests
import time
import requests
from jinja2 import Environment, FileSystemLoader
import markdownify
import utils

# Define the paper search endpoint URL
URL = 'https://api.semanticscholar.org/graph/v1/paper/search/bulk'
# Define the required query parameter and its value
# (in this case, the keyword we want to search for)
FIELDS = 'paperId,url,authors,journal,title,'
FIELDS += 'publicationTypes,publicationDate,citationCount,'
FIELDS += 'publicationVenue'
BASE_PARAMS = {
# 'limit': 5,
'publicationTypes': 'JournalArticle',
# 'year': '2020-',
'fields': 'paperId,url,journal,title,publicationTypes,publicationDate,citationCount,publicationVenue',
'fields': FIELDS,
# 'sort': 'citationCount:desc',
'token': None
}
N = 100
N = 7
DIC = {}

def fetch_articles(search_query,
Expand All @@ -42,27 +44,15 @@ def fetch_articles(search_query,

fetched_data = []
while True:
status_code_429 = 0
while True:
# Make the GET request to the paper search endpoint with the URL and query parameters
search_response = requests.get(URL, params=query_params, timeout=None)
# WHen the status code is 429, sleep for 5 minutes
# if search_response.status_code == 429:
# status_code_429 += 1
# if status_code_429 > 10:
# print ('Too many requests!')
# print ('Sleeping for 5 minutes and 10 seconds....')
# time.sleep(310)
# continue
# When the status code is 200, break the loop
if search_response.status_code != 200:
print ('status code', search_response.status_code)
print ('Retrying....')
# print ('Sleeping for 3 seconds....')
# time.sleep(3)
else:
break

search_response_json = search_response.json()
fetched_data += search_response_json['data']
# End the loop if we have fetched enough data
Expand All @@ -73,22 +63,37 @@ def fetch_articles(search_query,
# if the token is not None
if search_response_json['token'] is not None:
query_params['token'] = search_response_json['token']

# fix the publicationVenue and journal
for paper in fetched_data:
# print (paper)
publication_venue = paper['publicationVenue']
if publication_venue is None:
paper['publicationVenue'] = 'NA'
elif 'name' in publication_venue:
paper['publicationVenue'] = publication_venue['name']
else:
paper['publicationVenue'] = ''
# if publication_venue is None:
# paper['publicationVenue'] = 'NotAvbl'
# elif 'name' in publication_venue:
# paper['publicationVenue'] = publication_venue['name']
# else:
# paper['publicationVenue'] = 'NotAvbl'
paper['publicationVenue'] = publication_venue.get('name', 'NotAvbl')\
if publication_venue is not None else 'NotAvbl'
journal = paper['journal']
if journal is None:
paper['journal'] = 'NA'
elif 'name' in journal:
paper['journal'] = journal['name']
paper['journal'] = journal.get('name', 'NotAvbl') if journal is not None else 'NotAvbl'

# if journal is None:
# paper['journal'] = 'NotAvbl'
# elif 'name' in journal:
# paper['journal'] = journal['name']
# else:
# paper['journal'] = 'NotAvbl'
# Set journal to publicationVenue if journal is not available
if paper['journal'] == 'NotAvbl':
paper['journal'] = paper['publicationVenue']
# Set authors to NotAvbl if authors are not available
authors = []
for author in paper['authors']:
authors.append(author['name'])
if len(authors) == 0:
authors = ['NotAvbl']
paper['authors'] = ', '.join(authors)
return fetched_data

def create_template(template_file, category_name, df, dic_all_citations=None) -> str:
Expand Down Expand Up @@ -118,8 +123,11 @@ def create_template(template_file, category_name, df, dic_all_citations=None) ->
continue
categories.append(category)
num_citations_across_categories.append(num_citations_categories)

# Render the template
content = template.render(
# current time
current_time=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
most_cited_articles=DIC[category_name]['most_cited_articles'][0:N],
most_recent_articles=DIC[category_name]['most_recent_articles'][0:N],
category_name=category_name,
Expand Down Expand Up @@ -150,17 +158,17 @@ def main():
for line in f:
if line.split('\t')[0] == 'Title':
continue
print (line.split('\t'))
title = line.split('\t')[0]
query = line.split('\t')[1].rstrip()
print (f'Title: {title}\nQuery: {query}')
category_name = title.replace(' ', '_')
################################
## Fetch the most cited articles
data = fetch_articles(query)
DIC[category_name] = {'title': title, 'query': query, 'most_cited_articles': data}
# plot = utils.metrics_over_time_js(data, category_name, title)
# plot.savefig(f'../../docs/assets/{category_name}.png')
df = utils.metrics_over_time_js(data, category_name, title)
df = utils.metrics_over_time_js(data)
################################
## Fetch the most recent articles
data = fetch_articles(query, sort = 'publicationDate:desc')
Expand All @@ -177,31 +185,32 @@ def main():
################################

# End of file
title = 'All'
title = 'Overview'
query = ' | '.join([category_items['query'] for _, category_items in DIC.items()])
category_name = 'All'
category_name = 'Overview'
################################
## Fetch the most cited articles
data = fetch_articles(query)
# print (data)
DIC[category_name] = {'title': title, 'query': query, 'most_cited_articles': data}
# plot = utils.metrics_over_time_js(data, category_name, title)
# plot.savefig(f'../../docs/assets/{category_name}.png')
df = utils.metrics_over_time_js(data, category_name, title)
df = utils.metrics_over_time_js(data)
################################
## Fetch the most recent articles
data = fetch_articles(query, sort = 'publicationDate:desc')
DIC[category_name]['most_recent_articles'] = data
# print (data[0])
# print (data[6])
# Make bar plot for the number of citations of top 100 articles
# in each category
dic_all_citations = utils.all_citations_js(DIC)
markdown_text = create_template("all.txt", category_name, df, dic_all_citations=dic_all_citations)
markdown_text = create_template("overview.txt", category_name, df, dic_all_citations=dic_all_citations)
# DIC[category_name]['most_cited_articles'][0:N],
# DIC[category_name]['most_recent_articles'][0:N])
# Add the hide navigation
markdown_text = "---\nhide:\n - navigation\n---\n" + markdown_text
# Write the markdown text to a file
with open(f'../../docs/index.md', 'w', encoding='utf-8') as file:
with open('../../docs/index.md', 'w', encoding='utf-8') as file:
file.write(markdown_text)
################################
# Read YAML file
Expand All @@ -211,14 +220,14 @@ def main():
# Add more stuff to the YAML data
data['nav'] = []
for category_name, category_items in DIC.items():
if category_name == 'All':
if category_name == 'Overview':
data['nav'].append({category_items['title']: 'index.md'})
else:
data['nav'].append({category_items['title']: category_name + '.md'})

# reverser the order so that All appears first
# reverser the order so that Overview appears first
data['nav'] = data['nav'][::-1]
print (data['nav'])
# print (data['nav'])
# Write modified YAML data back to file
utils.write_yaml(data, '../../mkdocs.yml')

Expand Down
39 changes: 29 additions & 10 deletions app/code/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@
import pandas as pd
import yaml

def metrics_over_time(data, category_name, title) -> plt:
def metrics_over_time(data) -> plt:
"""
Return the metrics over time
Args:
data (list): list of dictionaries
category_name (str): category name
title (str): title of the graph
Returns:
None
Expand Down Expand Up @@ -74,11 +72,9 @@ def metrics_over_time(data, category_name, title) -> plt:
plt.tight_layout()
# Save the graph
return plt


#A26, B1

def metrics_over_time_js(data, category_name, title) -> plt:
def metrics_over_time_js(data) -> plt:
"""
Return the metrics over time
Expand Down Expand Up @@ -136,8 +132,6 @@ def all_citations_js(dic) -> list:
Returns:
dic_all_citations (dict): dictionary with the number of citations for all categories
"""
categories = []
num_citations = []
dic_all_citations = {}
for category_name, category_name_items in dic.items():
sum_citations = 0
Expand All @@ -152,11 +146,36 @@ def all_citations_js(dic) -> list:

# Function to read YAML file
def read_yaml(file_path):
with open(file_path, 'r') as file:
'''
Function to read YAML file
Args:
file_path (str): path to the YAML file
Returns:
dict: data from the YAML file
Example:
data = read_yaml('data.yaml')
'''
with open(file_path, 'r', encoding='utf-8') as file:
data = yaml.safe_load(file)
return data

# Function to write YAML file
def write_yaml(data, file_path):
with open(file_path, 'w') as file:
'''
Function to write YAML file
Args:
data (dict): data to write to the YAML file
file_path (str): path to the YAML file
Returns:
None
Example:
write_yaml(data, 'data.yaml')
'''
with open(file_path, 'w', encoding='utf-8') as file:
yaml.dump(data, file, default_flow_style=False)
Loading

0 comments on commit 26f14a1

Please sign in to comment.