Skip to content

Commit

Permalink
fix: snippet to update paper ids, and snippet to fix author name
Browse files Browse the repository at this point in the history
  • Loading branch information
gurdeep330 committed Jun 7, 2024
1 parent 5f97153 commit ee2b1d6
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 2 deletions.
16 changes: 15 additions & 1 deletion app/code/literature_fetch_recommendation_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ def create_template(template,
if len(paper_obj.authors) > 0:
continue
for author in paper_data['authors']:
# if the author id is None, set it to the name
if author['authorId'] is None:
author['authorId'] = author['name']
print (f'Author ID is None for {author["name"]}. Setting it to the name.')
# Add the author to the article
paper_obj.authors.append(
Author(author['authorId'],
author_name=author['name']))
Expand All @@ -132,6 +137,11 @@ def create_template(template,
# rec_paper_obj.add_paper_details(rec_paper_data)
##
for author in rec_paper_data['authors']:
# if the author id is None, set it to the name
if author['authorId'] is None:
author['authorId'] = author['name']
print (f'Author ID is None for {author["name"]}. Setting it to the name.')
# Add the author to the article
rec_paper_obj.authors.append(
Author(author['authorId'],
author_name=author['name']))
Expand Down Expand Up @@ -180,6 +190,11 @@ def create_template(template,
paper_obj = Article(paper_data['paperId'])
utils.add_paper_details(paper_obj, paper_data)
for author in paper_data['authors']:
# if the author id is None, set it to the name
if author['authorId'] is None:
author['authorId'] = author['name']
print (f'Author ID is None for {author["name"]}. Setting it to the name.')
# Add the author to the article
paper_obj.authors.append(
Author(author['authorId'],
author_name=author['name']))
Expand All @@ -188,7 +203,6 @@ def create_template(template,
# Get the metrics over time
df = utils.metrics_over_time_js(topic_obj.paper_ids['recommended'])
authors_ids = topic_obj.get_all_authors_ids() # Get all the authors of the articles
# print (authors_ids)
author_details = utils.get_author_details(authors_ids) # Get the details of the authors
for article_type in topic_obj.paper_ids:
for article_id, article_obj in topic_obj.paper_ids[article_type].items():
Expand Down
34 changes: 33 additions & 1 deletion app/code/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
'''

import sys
import re
import matplotlib.pyplot as plt
import pandas as pd
import yaml
Expand Down Expand Up @@ -47,6 +48,21 @@ def update_paper_details(topic_obj):
all_paper_ids += list(topic_obj.paper_ids['negative'].keys())
all_paper_ids = list(set(all_paper_ids))
all_paper_data = get_paper_details(all_paper_ids)
# Check if the paper id matches the paper data
# If not, change the paper id to the new paper id
for paper_id, paper_data in zip(all_paper_ids, all_paper_data):
if paper_id == paper_data['paperId']:
continue
print (f'Paper ID {paper_id} does not match {paper_data["paperId"]}.\
Changing the paper ID.')
if paper_id in topic_obj.paper_ids['positive']:
# change the paper id in the positive articles
topic_obj.paper_ids['positive'][paper_data['paperId']] = \
topic_obj.paper_ids['positive'].pop(paper_id)
elif paper_id in topic_obj.paper_ids['negative']:
# change the paper id in the negative articles
topic_obj.paper_ids['negative'][paper_data['paperId']] = \
topic_obj.paper_ids['negative'].pop(paper_id)
return all_paper_data

def add_paper_details(article_obj, article_data):
Expand Down Expand Up @@ -81,6 +97,8 @@ def update_h_index(article_obj, dic):
author.h_index = row['hIndex']
author.name = row['name']
author.citation_count = row['citationCount']
if row['hIndex'] is None:
continue
authors_h_index_list.append(row['hIndex'])
if len(authors_h_index_list) == 0:
authors_avg_h_index = 0
Expand Down Expand Up @@ -215,7 +233,7 @@ def get_paper_details(paper_ids, fields=FIELDS):
status_code = search_response.status_code
return search_response.json()

def get_author_details(authors_ids):
def get_author_details(all_authors_ids):
"""
Get the author details
Expand All @@ -225,6 +243,19 @@ def get_author_details(authors_ids):
Returns:
authors_details (list): list of authors details
"""
# Some authors have no ids assigned, and in that case their ID is their name
# So here we exclude such authors and already prepare their output
author_details_wo_id = []
authors_ids = []
for author_id in all_authors_ids:
# check if author id contains only alphabets
if re.fullmatch(r'[A-Za-z ]+', author_id):
author_details_wo_id.append({'authorId': author_id,
'hIndex': None,
'name': author_id,
'citationCount': None})
continue
authors_ids.append(author_id)
# Loop over every 1000 authors
authors_details = []
for start_index in range(0, len(authors_ids), 1000):
Expand Down Expand Up @@ -252,6 +283,7 @@ def get_author_details(authors_ids):
search_response.json())
sys.exit()
authors_details += search_response.json()
authors_details += author_details_wo_id
return authors_details

def metrics_over_time_js(data) -> plt:
Expand Down

0 comments on commit ee2b1d6

Please sign in to comment.