diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5bc7ba69..9929f0be 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -52,8 +52,8 @@ jobs: - name: Run pylint shell: bash -l {0} run: | - pylint tests - pylint app + pylint --disable=E0401,R0801,C010 tests + pylint --disable=E0401,R0801,C010 app # code coverage job for ubuntu and macos code-cov-ubuntu-macos: diff --git a/README.md b/README.md index 397f6424..8e2e1cef 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,9 @@ -[![TESTS](https://github.com/VirtualPatientEngine/demo/actions/workflows/tests.yml/badge.svg)](https://github.com/VirtualPatientEngine/demo/actions/workflows/tests.yml) -[![RELEASE](https://github.com/VirtualPatientEngine/demo/actions/workflows/release.yml/badge.svg)](https://github.com/VirtualPatientEngine/demo/actions/workflows/release.yml) +[![TESTS](https://github.com/VirtualPatientEngine/literatureSurvey/actions/workflows/tests.yml/badge.svg)](https://github.com/VirtualPatientEngine/literatureSurvey/actions/workflows/tests.yml) +[![RELEASE](https://github.com/VirtualPatientEngine/literatureSurvey/actions/workflows/release.yml/badge.svg)](https://github.com/VirtualPatientEngine/literatureSurvey/actions/workflows/release.yml) -

🚀 Demo

+

🚀 Literature Survey

-This repository serves as a template to develop a code repository (shareable packages). The followoing features are pre-embedded into this repository to help you kick start with your work: -1. **Automated Workflows for testing and Semantic Release using GitHub Actions** -2. **Automated Documentation and Hosting using MkDocs** -3. **Automated Distributable Release (.whl package) using setup.py** - -In the files ```setup.py``` and ```release.config.js```, change the package name from **gurdeepdemo** to whatever you desire. - -View the documentation of the example code used in the repo @ https://virtualpatientengine.github.io/demo +This repository serves as a template to develop a literature survey repository. +View the documentation of the example code used in the repo @ https://virtualpatientengine.github.io/literatureSurvey >NOTE: Read more about these and other features in the CodeOps and DevOps documents available on the teams channel. diff --git a/app/literatureFetch.py b/app/literatureFetch.py new file mode 100755 index 00000000..ab480c06 --- /dev/null +++ b/app/literatureFetch.py @@ -0,0 +1,137 @@ +""" +A class to extract the data from Uniprot +for a given protein +""" + +import requests +from jinja2 import Environment, FileSystemLoader + +class Arxiv: + """ + A class to extract the data from Arxiv + for a given arxiv id + """ + def __init__(self, paper_id) -> None: + """ + Initialize the class + + Args: + arxiv_id (str): arxiv id + title (str): title of the paper + published (str): published date + link (str): link to the paper + + Returns: + None + """ + self.arxiv_id = paper_id + self.semantic_scholar_data = {} + def get_semantic_scholar_data(self) -> dict: + """ + Return the semantic scholar data for a given arxiv id + + Args: + None + Returns: + dict: semantic scholar data + """ + return self.semantic_scholar_data + def get_arxiv_id(self): + """ + Return the arxiv id + + Args: + None + Returns: + str: arxiv id + """ + return self.arxiv_id + +def get_semantic_scholar(article_ids): + """ + Return the semantic scholar data for a given arxiv ids + + Args: + arxiv_ids (list): list of arxiv ids + Returns: + dict: dictionary of arxiv ids and their semantic scholar data + """ + + fields = 'referenceCount,citationCount,title,authors,' + fields += 'journal,fieldsOfStudy,publicationTypes,publicationDate,url' + r = requests.post( + 'https://api.semanticscholar.org/graph/v1/paper/batch', + params={'fields': fields}, + json={"ids": article_ids}, + timeout=10 + ) + # print(json.dumps(r.json(), indent=2)) + return r.json() + +if __name__ == "__main__": + # Run the script from command line + import markdownify + import feedparser + # Define the query + QUERY = 'stat.ML:causal+link+prediction' + # Define the base url + BASE_URL = 'http://export.arxiv.org/api/query?search_query=' + # Define the search parameters + START = 0 + MAX_RESULTS = 500 + SORT_BY = 'relevance' + SEARCH_PARAMS = f'&start={START}&max_results={MAX_RESULTS}&sortBy={SORT_BY}' + # Define the url link + URL_LINK = BASE_URL + QUERY + SEARCH_PARAMS + # Parse the url link + feed = feedparser.parse(URL_LINK) + num_entries = len(feed.entries) + arxiv_ids = [] # List to store the arxiv ids + dic_arxiv_ids = {} # Dictionary to store the arxiv ids + for i in range(num_entries): + # print (feed.entries[i]['title'], feed.entries[i]['published'], feed.entries[i]['link']) + arxiv_id = feed.entries[i]['link'].split('/')[-1].split('v')[0] + dic_arxiv_ids[arxiv_id] = Arxiv(arxiv_id) + arxiv_ids = ["ARXIV:"+arxiv_id for arxiv_id in dic_arxiv_ids] + # print (arxiv_ids) + results = get_semantic_scholar(arxiv_ids) + # print (results) + print (len(results)) + + articles = [] + for i in range(num_entries): + arxiv_id = arxiv_ids[i].split(':')[1] + print (results[i]) + if results[i] is None: + continue + dic_arxiv_ids[arxiv_id].semantic_scholar_data = results[i] + # Append the articles + if dic_arxiv_ids[arxiv_id].semantic_scholar_data is not None: + if int(dic_arxiv_ids[arxiv_id].semantic_scholar_data['citationCount']) > 1000: + # Extract author names + authors = [] + for author in dic_arxiv_ids[arxiv_id].semantic_scholar_data['authors']: + authors.append(author['name']) + articles.append({"title": results[i]['title'], + 'url': results[i]['url'], + "authors": ', '.join(authors), + "citations": results[i]['citationCount'], + 'journal': results[i]['journal'], + 'fieldsOfStudy': results[i]['fieldsOfStudy'], + 'publicationTypes': results[i]['publicationTypes'], + 'publicationDate': results[i]['publicationDate'] + }) + # Set the template environment + environment = Environment(loader=FileSystemLoader("../templates/")) + # Get the template + template = environment.get_template("message.txt") + # Render the template + content = template.render( + articles=articles, + query=QUERY, + ) + # Convert the content to markdown + markdown_text = markdownify.markdownify(content) + # Write the markdown text to a file + with open('../docs/index.md', 'w', encoding='utf-8') as file: + file.write(markdown_text) diff --git a/app/uniprot.py b/app/uniprot.py deleted file mode 100755 index 38aaa0d8..00000000 --- a/app/uniprot.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -A class to extract the data from Uniprot -for a given protein -""" - -import re -import requests - -class Uniprot: - """A class to extract the data from Uniprot - for a given uniprot accession - """ - - def __init__(self, uniprot_acc: str): - """Initialize the class""" - self.uniprot_acc = uniprot_acc - - def get_uniprot_status(self) -> int: - """Get the status of the protein - - Returns: - int: status of the protein - """ - url = f"https://rest.uniprot.org/uniprotkb/{self.uniprot_acc}.fasta" - response = requests.get(url, timeout=10) - return response.status_code - - def get_protein_name(self) -> str: - """Get the name of the protein - - Returns: - str: name of the protein - """ - url = f"https://rest.uniprot.org/uniprotkb/{self.uniprot_acc}.txt" - response = requests.get(url, timeout=10) - protein_name = re.search(r"RecName: Full=(.+);", response.text).group(1) - return protein_name - - def get_protein_sequence(self) -> str: - """Get the sequence of the protein - - Returns: - str: sequence of the protein - """ - url = f"https://rest.uniprot.org/uniprotkb/{self.uniprot_acc}.fasta" - response = requests.get(url, timeout=10) - protein_sequence = "".join(response.text.split("\n")[1:]) - return protein_sequence - - def get_reactome_pathways(self) -> list: - """Get the Reactome pathways for the protein - - Returns: - str: Reactome pathways for the protein - """ - url = f"https://rest.uniprot.org/uniprotkb/{self.uniprot_acc}.txt" - response = requests.get(url, timeout=10) - # reactome_pathways = re.search(r"Reactome; (.+)", response.text) - reactome_pathways = re.findall(r"Reactome; (.+)", response.text) - return reactome_pathways - -if __name__ == "__main__": - # Run the script from command line - UNIPROT_ACC = "P0DTC2" - obj = Uniprot(UNIPROT_ACC) - status_code = obj.get_uniprot_status() - if status_code == 200: - print(obj.get_protein_name()) - print(obj.get_protein_sequence()) - print(obj.get_reactome_pathways()) - else: - print("Protein not found") diff --git a/docs/index.md b/docs/index.md index cf3f5e5c..541b98e4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1 +1,21 @@ -# Welcome to the documentation of the demo app developed by Team VPE + + + + + + +### Query: stat.ML:causal+link+prediction + + + + +| Title | Authors | PublicationDate | #Citations | Journal/Conference | +| --- | --- | --- | --- | --- | +| [Link Prediction in Complex Networks: A Survey](https://www.semanticscholar.org/paper/8cd9aa720a3a2f9dcb52ad9eb1bf258a80ce0648) | Linyuan Lu, Tao Zhou | 2010-10-05 | 2520 | {'name': 'ArXiv', 'volume': 'abs/1010.0725'} | +| [Link Prediction Based on Graph Neural Networks](https://www.semanticscholar.org/paper/e4715a13f6364b1c81e64f247651c3d9e80b6808) | Muhan Zhang, Yixin Chen | 2018-02-27 | 1422 | {'pages': '5171-5181'} | +| [Predicting missing links via local information](https://www.semanticscholar.org/paper/76c361552181f3798a3fae7485a22a333af85047) | Tao Zhou, Linyuan Lü, Yi‐Cheng Zhang | 2009-01-05 | 1495 | {'name': 'The European Physical Journal B', 'pages': '623-630', 'volume': '71'} | +| [Variational Graph Auto-Encoders](https://www.semanticscholar.org/paper/54906484f42e871f7c47bbfe784a358b1448231f) | Thomas Kipf, M. Welling | 2016-11-21 | 2597 | {'name': 'ArXiv', 'volume': 'abs/1611.07308'} | +| [Predicting positive and negative links in online social networks](https://www.semanticscholar.org/paper/1926bad0dc2c1d302ad9a673226f8ca56869683c) | J. Leskovec, D. Huttenlocher, J. Kleinberg | 2010-03-11 | 1601 | {'name': 'ArXiv', 'volume': 'abs/1003.2429'} | + + + diff --git a/docs/literatureFetch.md b/docs/literatureFetch.md new file mode 100644 index 00000000..f4a4a705 --- /dev/null +++ b/docs/literatureFetch.md @@ -0,0 +1,3 @@ +## class Arxiv + +:::app.literatureFetch \ No newline at end of file diff --git a/docs/test_literatureFetch.md b/docs/test_literatureFetch.md new file mode 100644 index 00000000..ea34f47c --- /dev/null +++ b/docs/test_literatureFetch.md @@ -0,0 +1,3 @@ +## class Arxiv + +:::app.test_literatureFetch \ No newline at end of file diff --git a/docs/test_uniprot.md b/docs/test_uniprot.md deleted file mode 100644 index 15d9e41a..00000000 --- a/docs/test_uniprot.md +++ /dev/null @@ -1,3 +0,0 @@ -## class Uniprot - -:::tests.test_uniprot \ No newline at end of file diff --git a/docs/uniprot.md b/docs/uniprot.md deleted file mode 100644 index 01e5c5b9..00000000 --- a/docs/uniprot.md +++ /dev/null @@ -1,3 +0,0 @@ -## class UniProt - -:::app.uniprot \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index f6a4cc07..ddc25ca4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,4 +1,4 @@ -site_name: Demo +site_name: Literature Survey plugins: - mkdocstrings: paths: ["app", "tests"] @@ -6,5 +6,7 @@ theme: name: material nav: - Home: index.md - - class UniProt: uniprot.md - - methods in tests/uniprot: test_uniprot.md + - API Reference: + - Arxiv: literatureFetch.md + - Tests: test_literatureFetch.md + \ No newline at end of file diff --git a/release.config.js b/release.config.js index a608a3d2..9c13d4cf 100644 --- a/release.config.js +++ b/release.config.js @@ -6,33 +6,22 @@ const config = { '@semantic-release/commit-analyzer', '@semantic-release/release-notes-generator', ["@semantic-release/exec", { - // "publishCmd": "echo 'RELEASE_VERSION=${nextRelease.version}' >> $GITHUB_ENV && echo ${nextRelease.version} > release_version.txt", - // "prepareCmd": "echo ${nextRelease.version} > release_version.txt ; python3 setup.py bdist_wheel --dist-dir dist/ ; mv dist/*-${nextRelease.version}-*whl dist/gurdeep-demo.whl", - // "publishCmd": "echo 'RELEASE_VERSION=${nextRelease.version}' > release_version.txt", - // "prepareCmd": "python3 setup.py bdist_wheel --dist-dir dist/", - "prepareCmd": "echo ${nextRelease.version} > release_version.txt ; python3 setup.py bdist_wheel --dist-dir dist/ ; mv dist/*-${nextRelease.version}-*whl dist/"+package_name+"-latest-py3-none-any.whl", + // "prepareCmd": "echo ${nextRelease.version} > release_version.txt ; python3 setup.py bdist_wheel --dist-dir dist/ ; mv dist/*-${nextRelease.version}-*whl dist/"+package_name+"-latest-py3-none-any.whl", }], ["@semantic-release/git", { - // "assets": ["dist/*.js", "dist/*.js.map"], - // "assets": ["dist/gurdeep-demo.whl"], - // "assets": ["dist/gurdeepdemo-${nextRelease.version}-py3-none-any.whl"], - "assets": ["dist/"+package_name+"-latest-py3-none-any.whl"], + // "assets": ["dist/"+package_name+"-latest-py3-none-any.whl"], }], - - // '@semantic-release/github' [ - "@semantic-release/github", - { - "assets": [ - { - // "path": "dist/gurdeep-demo.whl", - // "path": "dist/gurdeep-demo-${nextRelease.version}-*whl", - "path": "dist/"+package_name+"-latest-py3-none-any.whl", - "label": package_name+"-${nextRelease.version}", - "message": "chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" - } - ] - } + // "@semantic-release/github", + // { + // "assets": [ + // { + // "path": "dist/"+package_name+"-latest-py3-none-any.whl", + // "label": package_name+"-${nextRelease.version}", + // "message": "chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" + // } + // ] + // } ], ] }; diff --git a/templates/message.txt b/templates/message.txt new file mode 100644 index 00000000..a2b0f78c --- /dev/null +++ b/templates/message.txt @@ -0,0 +1,28 @@ + + + + + + + +

Query: {{ query }}

+ + + + + + + + + {% for article in articles %} + + + + + + + + {% endfor %} +
TitleAuthorsPublicationDate#CitationsJournal/Conference
[{{ article.title}}]({{article.url}}){{ article.authors}}{{ article.publicationDate}}{{ article.citations }}{{ article.journal }}
+ + \ No newline at end of file diff --git a/tests/test_literatureFetch.py b/tests/test_literatureFetch.py new file mode 100644 index 00000000..1b5f76f1 --- /dev/null +++ b/tests/test_literatureFetch.py @@ -0,0 +1,26 @@ +"""Test Uniprot class functions""" + +import pytest +from app import literatureFetch + +@pytest.fixture(name="inputs") +def inputs_fixture() -> literatureFetch.Arxiv: + """Return objet of Uniprot class""" + obj = literatureFetch.Arxiv("1706.03762") + return obj + +def test_get_arxiv_id(inputs) -> None: + """Test get_arxiv_data method""" + arxiv_id = inputs.get_arxiv_id() + assert arxiv_id == inputs.arxiv_id + +def test_get_semantic_scholar_data(inputs) -> None: + """Test get_semantic_scholar_data method""" + inputs.semantic_scholar_data = literatureFetch.get_semantic_scholar(["ARXIV:"+inputs.arxiv_id]) + semantic_scholar_data = inputs.get_semantic_scholar_data() + assert semantic_scholar_data[0]['title'] == "Attention is All you Need" + +def test_get_semantic_scholar(inputs) -> None: + """Test get_semantic_scholar function""" + semantic_scholar_data = literatureFetch.get_semantic_scholar(["ARXIV:"+inputs.arxiv_id]) + assert semantic_scholar_data[0]['referenceCount'] == 42 diff --git a/tests/test_uniprot.py b/tests/test_uniprot.py deleted file mode 100644 index 3338376f..00000000 --- a/tests/test_uniprot.py +++ /dev/null @@ -1,42 +0,0 @@ -"""Test Uniprot class functions""" - -import pytest -from app import uniprot - -@pytest.fixture(name="inputs") -def inputs_fixture() -> uniprot.Uniprot: - """Return objet of Uniprot class""" - obj = uniprot.Uniprot("P0DTC4") - return obj - -@pytest.fixture(name="inputs2") -def inputs2_fixture() -> uniprot.Uniprot: - """Return objet of Uniprot class""" - obj = uniprot.Uniprot("P0XXX0") - return obj - -def test_get_uniprot_status(inputs) -> None: - """Test get_uniprot_status method""" - status_code = inputs.get_uniprot_status() - assert status_code == 200 - -def test_get_uniprot_status_incorrect(inputs2) -> None: - """Test get_uniprot_status method with incorrect input""" - status_code = inputs2.get_uniprot_status() - assert status_code != 200 - -def test_get_protein_name(inputs) -> None: - """Test get_protein_name method""" - protein_name = inputs.get_protein_name() - assert protein_name == "Envelope small membrane protein {ECO:0000255|HAMAP-Rule:MF_04204}" - -def test_get_protein_sequence(inputs) -> None: - """Test get_protein_sequence method""" - protein_sequence = inputs.get_protein_sequence() - true_sequence = "MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV" - assert protein_sequence == true_sequence - -def test_get_reactome_pathways(inputs) -> None: - """Test get_reactome_pathways method""" - reactome_pathways = inputs.get_reactome_pathways() - assert 'R-HSA-9694493; Maturation of protein E.' in reactome_pathways