Skip to content

Commit

Permalink
Merge pull request #14 from VirtualPatientEngine/develop
Browse files Browse the repository at this point in the history
Feat: added curated literature and additional reading tables
  • Loading branch information
lilijap authored Mar 19, 2024
2 parents 8282e2c + f688c41 commit 03819db
Show file tree
Hide file tree
Showing 17 changed files with 337 additions and 13,778 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
- name: Run pytest
shell: bash -l {0}
run: |
pytest
pytest --cache-clear
# pylint
- name: Run pylint
Expand Down Expand Up @@ -76,7 +76,7 @@ jobs:
run: pip3 install -r requirements.txt # Adjust this according to your project

- name: Run tests with coverage
run: coverage run -m pytest
run: coverage run -m pytest --cache-clear
continue-on-error: true

- name: Check coverage
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ dmypy.json
# Cython debug symbols
cython_debug/

# .md files in the docs foldr of mkdocs
docs/*.md

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
Expand Down
126 changes: 110 additions & 16 deletions app/code/literature_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,35 @@
N = 100
DIC = {}

def fix_publication_venue(artciles_data) -> list:
"""
Fix the publication venue and journal
Args:
data (list): list of articles
Returns:
list: list of articles
"""
# fix the publicationVenue and journal
for paper in artciles_data:
publication_venue = paper['publicationVenue']
paper['publicationVenue'] = publication_venue.get('name', 'NotAvbl')\
if publication_venue is not None else 'NotAvbl'
journal = paper['journal']
paper['journal'] = journal.get('name', 'NotAvbl') if journal is not None else 'NotAvbl'
# Set journal to publicationVenue if journal is not available
if paper['journal'] == 'NotAvbl':
paper['journal'] = paper['publicationVenue']
# Set authors to NotAvbl if authors are not available
authors = []
for author in paper['authors']:
authors.append(author['name'])
if len(authors) == 0:
authors = ['NotAvbl']
paper['authors'] = ', '.join(authors)

return artciles_data

def fetch_articles(search_query,
sort='citationCount:desc') -> list:
"""
Expand Down Expand Up @@ -58,23 +87,73 @@ def fetch_articles(search_query,
# or if there is no more data to fetch
if len(fetched_data) >= N:
break

# fix the publicationVenue and journal
fetched_data = fix_publication_venue(fetched_data)
return fetched_data

def fetch_manually_curated_articles(curated_file) -> list:
"""
Return the manually curated articles
Returns:
list: list of articles
"""
# Read the manually curated articles
s2_ids = {}
fetched_data = []
with open(curated_file, 'r', encoding='utf-8') as manual_f:
for manual_line in manual_f:
if 'category' in manual_line.split('\t')[0].lstrip().rstrip():
continue
if manual_line.split('\t')[2].lstrip().rstrip() == 'NA':
continue
topic = manual_line.split('\t')[0].lstrip().rstrip()
link = manual_line.split('\t')[3].lstrip().rstrip()
paper_id = link.split('/')[-1]
if '?' in paper_id:
paper_id = paper_id.split('?')[0]
if paper_id not in s2_ids:
s2_ids[paper_id] = topic
endpoint = 'https://api.semanticscholar.org/graph/v1/paper/batch'
params = {'fields': FIELDS}
json = {'ids': list(s2_ids.keys())}
status_code = 0
while status_code != 200:
# Make a POST request to the paper search batch
# endpoint with the URL
search_response = requests.post(endpoint, params=params, json=json, timeout=None)
status_code = search_response.status_code
search_response_json = search_response.json()
fetched_data += search_response_json
# Add the topic to the fetched data
for paper in fetched_data:
publication_venue = paper['publicationVenue']
paper['publicationVenue'] = publication_venue.get('name', 'NotAvbl')\
if publication_venue is not None else 'NotAvbl'
journal = paper['journal']
paper['journal'] = journal.get('name', 'NotAvbl') if journal is not None else 'NotAvbl'
# Set journal to publicationVenue if journal is not available
if paper['journal'] == 'NotAvbl':
paper['journal'] = paper['publicationVenue']
# Set authors to NotAvbl if authors are not available
authors = []
for author in paper['authors']:
authors.append(author['name'])
if len(authors) == 0:
authors = ['NotAvbl']
paper['authors'] = ', '.join(authors)
paper['topic'] = s2_ids[paper['paperId']]
# fix the publicationVenue and journal
fetched_data = fix_publication_venue(fetched_data)
return fetched_data

def fetch_additional_reading(additional_reading_file) -> list:
"""
Fetch the additional reading list
Args:
additional_reading_file (str): file with the additional reading list
Returns:
list: list of articles
"""
# Read the addiitional reading list
fetched_data = []
with open(additional_reading_file, 'r', encoding='utf-8') as manual_f:
for additional_reading_line in manual_f:
if 'Category' in additional_reading_line.split('\t')[0].lstrip().rstrip():
continue
topic = additional_reading_line.split('\t')[0].lstrip().rstrip()
name = additional_reading_line.split('\t')[1].lstrip().rstrip()
link = additional_reading_line.split('\t')[2].lstrip().rstrip()
fetched_data.append({'topic': topic, 'name': name, 'link': link})

return fetched_data

def create_template(template, topic, dic, df, dic_all_citations=None) -> str:
Expand Down Expand Up @@ -107,7 +186,11 @@ def create_template(template, topic, dic, df, dic_all_citations=None) -> str:
if dic_all_citations is None:
categories = None
num_citations_across_categories = None
manually_curated_articles = None
additional_reading = None
else:
manually_curated_articles = dic[topic]['manually_curated_articles']
additional_reading = dic[topic]['additional_reading']
categories = []
num_citations_across_categories = []
for category, num_citations_categories in dic_all_citations.items():
Expand All @@ -121,6 +204,8 @@ def create_template(template, topic, dic, df, dic_all_citations=None) -> str:
current_time=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
most_cited_articles=dic[topic]['most_cited_articles'][0:N],
most_recent_articles=dic[topic]['most_recent_articles'][0:N],
manually_curated_articles=manually_curated_articles,
additional_reading=additional_reading,
category_name=topic,
title=dic[topic]['title'],
query=dic[topic]['query'],
Expand Down Expand Up @@ -187,7 +272,16 @@ def create_template(template, topic, dic, df, dic_all_citations=None) -> str:
## Fetch the most recent articles
data = fetch_articles(QUERY, sort = 'publicationDate:desc')
DIC[TOPIC]['most_recent_articles'] = data
# print (data[6])
################################
## Fetch the manually curated articles
MANUALLY_CURATED_FILE = '../data/manually_curated_articles.tsv'
data = fetch_manually_curated_articles(MANUALLY_CURATED_FILE)
DIC[TOPIC]['manually_curated_articles'] = data
################################
## Fetch the additional reading list
ADDITIONAL_READING_FILE = '../data/additional_reading.tsv'
data = fetch_additional_reading(ADDITIONAL_READING_FILE)
DIC[TOPIC]['additional_reading'] = data
# Make bar plot for the number of citations of top 100 articles
# in each category
DIC_ALL_CITATIONS = utils.all_citations_js(DIC)
Expand Down
17 changes: 17 additions & 0 deletions app/data/additional_reading.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Category Name URL
Examples of clinical trials in progress Randomized Controlled Trial of Digital Twin Precision Treatment: A Novel Whole Body Digital Twin Enabled Precision Treatment for Type 2 Diabetes https://clinicaltrials.gov/study/NCT05181449
Examples of clinical trials in progress  Validation of a Digital Twin Performing Strength Training https://clinicaltrials.gov/study/NCT04849923
Examples of clnical trials that have completed Feasibility Studies of Personalized Closed Loop https://clinicaltrials.gov/study/NCT04203823
Examples of clnical trials that have completed Digital Twin - Modelling Postprandial Triglyceride and Glucose Responses https://clinicaltrials.gov/study/NCT05313594
Quantitative Systems Pharmacology (QSP) best practices Applied Concepts in PBPK Modeling: How to Build a PBPK/PD Model (2016) https://www.semanticscholar.org/paper/Applied-Concepts-in-PBPK-Modeling%3A-How-to-Build-a-Kuepfer-Niederalt/ffa29f28ca8202c19505d682ac709f4ca69710e2?utm_source=direct_link
Quantitative Systems Pharmacology (QSP) best practices Current practices for QSP model assessment: an IQ consortium survey (2022) https://www.semanticscholar.org/paper/Current-practices-for-QSP-model-assessment%3A-an-IQ-Chan-Allen/41f7a87fa5c14a203b01c65a0cf351bb16f9a88a?utm_source=direct_link
Quantitative Systems Pharmacology (QSP) best practices Towards a comprehensive assessment of QSP models: what would it take? (2022) https://www.semanticscholar.org/paper/Towards-a-comprehensive-assessment-of-QSP-models%3A-Androulakis/4030dd410fc9ccde7bfe33a268ac6c71b119a8dd?utm_source=direct_link
Quantitative Systems Pharmacology (QSP) best practices Virtual Populations for Quantitative Systems Pharmacology Models (2022) https://www.semanticscholar.org/paper/Virtual-Populations-for-Quantitative-Systems-Cheng-Straube/68337aa80c1390201d9ea07d9ae681ebb37bfaa5?utm_source=direct_link
Quantitative Systems Pharmacology (QSP) best practices FDA-Industry Scientific Exchange on assessing quantitative systems pharmacology models in clinical drug development: a meeting report, summary of challenges/gaps, and future perspective (2021) https://www.semanticscholar.org/paper/FDA-Industry-Scientific-Exchange-on-assessing-in-a-Bai-Schmidt/9a319db9acb5786b324117b7321e455fda3dce62?utm_source=direct_link
Regulatory guidance Guidance for Industry and Food and Drug Administration Staff 2023 Assessing the Credibility of Computational Modeling and Simulation in Medical Device Submissions https://www.fda.gov/regulatoryinformation/searchfdaguidancedocuments/assessingcredibilitycomputationalmodelingandsimulationmedicaldevicesubmissions
Regulatory guidance Draft Guidance for Industry 2022 The Use of Physiologically Based Pharmacokinetic Analyses — Biopharmaceutics Applications for Oral Drug Product Development, Manufacturing Changes, and Controls https://www.fda.gov/regulatoryinformation/searchfdaguidancedocuments/usephysiologicallybasedpharmacokineticanalysesbiopharmaceuticsapplicationsoraldrugproduct
Regulatory guidance 2018 Physiologically Based Pharmacokinetic Analyses — Format and Content Guidance for Industry https://www.fda.gov/regulatory-information/search-fda-guidance-documents/physiologically-based-pharmacokinetic-analyses-format-and-content-guidance-industry
Regulatory guidance Guidance for Industry 2023 Evaluation of Gastric pHDependent Drug Interactions With AcidReducing Agents: Study Design, Data Analysis, and Clinical Implications Guidance for Industry https://www.fda.gov/regulatory-information/search-fda-guidance-documents/evaluation-gastric-ph-dependent-drug-interactions-acid-reducing-agents-study-design-data-analysis
Regulatory guidance Guidance for Industry 2023 Population Pharmacokinetics https://www.fda.gov/regulatory-information/search-fda-guidance-documents/population-pharmacokinetics
Regulatory guidance Guidance for Industry and Food and Drug Administration Staff 2023 Assessing the Credibility of Computational Modeling and Simulation in Medical Device Submissions https://www.fda.gov/regulatory-information/search-fda-guidance-documents/assessing-credibility-computational-modeling-and-simulation-medical-device-submissions
Regulatory guidance Guidance for Industry and Food and Drug Administration Staff 2016 Reporting of Computational Modeling Studies in Medical Device Submissions https://www.fda.gov/regulatory-information/search-fda-guidance-documents/reporting-computational-modeling-studies-medical-device-submissions
Loading

0 comments on commit 03819db

Please sign in to comment.