diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 0000000..77bc094
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1 @@
+buymeacoffee: fernandowip
\ No newline at end of file
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 618bf7e..888f78b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,10 +1,11 @@
name: Test SortGS
-
on:
push:
branches: [ master, dev ]
pull_request:
branches: [ master, dev ]
+ schedule:
+ - cron: '0 0 1 * *' # Run at 00:00 on the first of every month
jobs:
build:
@@ -13,7 +14,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- python-version: [3.7, 3.8, 3.9, '3.10', '3.11']
+ python-version: ['3.10', '3.11']
steps:
- uses: actions/checkout@v2
@@ -21,10 +22,6 @@ jobs:
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install -r requirements.txt
- name: Install the package
run: pip install -e .
- name: Test with unittest
diff --git a/README.md b/README.md
index 2f2841f..37aa4e4 100644
--- a/README.md
+++ b/README.md
@@ -3,12 +3,20 @@
sortgs is a Python tool for ranking Google Scholar publications by the number of citations. It is useful for finding relevant papers in a specific field. The data acquired from Google Scholar includes Title, Citations, Links, Rank, and a new column with the number of citations per year. In the background, it first try to fetch results using python requests. If it fails, it will use selenium to fetch the results.
-## Try on Google Colab: [](https://colab.research.google.com/github/WittmannF/sort-google-scholar/blob/master/examples/run_sortgs_on_colab.ipynb)
- - No install requirements! Limitations: Can't handle robot checking, so use it carefully.
+## 🚀 Run it on Google Colab
+- **No-Code Version (new!)**: [](https://colab.research.google.com/github/WittmannF/sort-google-scholar/blob/master/examples/Sort_Google_Scholar_No_Code_Version.ipynb) — *No coding required! Perfect for a quick start!* ⚡
+- **Code Version:** [](https://colab.research.google.com/github/WittmannF/sort-google-scholar/blob/master/examples/run_sortgs_on_colab.ipynb)— *For developers who want full control of what's behind the scenes!* 💻
+
+> 💡 **All you need** is a Google Account to get started.
+> ⚠️ **Note**: Google Scholar may block access after too many repetitive requests due to CAPTCHA checks, so proceed mindfully!
+
+## 📚 Colab No-Code Instructions
+https://github.com/user-attachments/assets/25de7bad-2a5d-4bcf-b486-faa1d7a29eb3
+
## Installation
-You can now install `sortgs` directly using `pip`:
+You can install `sortgs` directly using `pip`:
```bash
pip install sortgs
@@ -166,3 +174,9 @@ And check if all tests passes. Alternativelly send a PR, github actions will run
## LICENSE
- MIT
+
+## Support My Work
+If you find this project useful, consider supporting me:
+
+[![Buy Me a Coffee](https://img.shields.io/badge/-Buy%20Me%20a%20Coffee-ffdd00?style=flat&logo=buy-me-a-coffee&logoColor=black)](https://buymeacoffee.com/fernandowip)
+
diff --git a/setup.py b/setup.py
index 502c128..00d2773 100644
--- a/setup.py
+++ b/setup.py
@@ -10,9 +10,14 @@
long_description_content_type='text/markdown',
url='https://github.com/WittmannF/sort-google-scholar',
py_modules=['sortgs'], # Assuming your script is named sortgs.py
- install_requires=[
- # your dependencies here
+
+ install_requires=['requests',
+ 'beautifulsoup4',
+ 'pandas',
+ 'matplotlib',
+ 'selenium',
],
+
entry_points={
'console_scripts': [
'sortgs=sortgs:main', # This line sets up the command line tool
diff --git a/test/test_sortgs.py b/test/test_sortgs.py
index ccf117a..1146093 100644
--- a/test/test_sortgs.py
+++ b/test/test_sortgs.py
@@ -1,91 +1,55 @@
import unittest
-from unittest.mock import patch
-import sortgs
import os
import pandas as pd
class TestSortGS(unittest.TestCase):
@classmethod
- def setUpClass(self):
- '''run once before all tests'''
- os.system("python sortgs.py --debug --kw 'machine learning' --nresults 10 --endyear 2022")
- self.df_top_10=pd.read_csv('machine_learning.csv')
-
- os.system("python sortgs.py --debug --kw 'machine learning' --nresults 20 --endyear 2022")
- self.df_top_20=pd.read_csv('machine_learning.csv')
+ def setUpClass(cls):
+ '''Run once before all tests'''
+ os.system("sortgs 'machine learning' --debug --nresults 10 --endyear 2022")
+ cls.df_top_10_cli = pd.read_csv('machine_learning.csv')
- os.system("python sortgs.py --debug --kw 'machine learning' --nresults 20 --endyear 2022 --sortby 'cit/year'")
- self.df_top_sorted_cit_per_year=pd.read_csv('machine_learning.csv')
+ os.system("sortgs 'machine learning' --debug --nresults 10 --endyear 2022 --sortby 'cit/year'")
+ cls.df_top_sorted_cit_per_year_cli = pd.read_csv('machine_learning.csv')
- # Repeat the above, but testing the cli command
- os.system("sortgs 'machine learning' --debug --nresults 10 --endyear 2022")
- self.df_top_10_cli=pd.read_csv('machine_learning.csv')
+ def test_get_10_results_cli(self):
+ self.assertEqual(len(self.df_top_10_cli), 10)
- os.system("sortgs 'machine learning' --debug --nresults 20 --endyear 2022")
- self.df_top_20_cli=pd.read_csv('machine_learning.csv')
+ def test_is_sorted_by_citations(self):
+ df = self.df_top_10_cli
+ top_citations = list(df.Citations.values[:5])
+ self.assertEqual(top_citations, [3166, 2853, 2416, 948, 830])
- os.system("sortgs 'machine learning' --debug --nresults 20 --endyear 2022 --sortby 'cit/year'")
- self.df_top_sorted_cit_per_year_cli=pd.read_csv('machine_learning.csv')
-
- def test_get_10_results(self):
- self.assertEqual(len(self.df_top_10), 10)
-
- def test_get_20_results(self):
- self.assertEqual(len(self.df_top_20), 20)
-
- def test_is_sorted(self):
- df=self.df_top_20
- top_citations=list(df.Citations.values[:5])
- self.assertEqual(top_citations, [49230, 8603, 3166, 3069, 2853])
-
- def test_top_result(self):
- df=self.df_top_20
- top_author = str(df.Author.values[0])
+ def test_top_result_cli(self):
+ df = self.df_top_10_cli
+ top_author = str(df.Author.values[0]).strip()
top_citation = int(df.Citations.values[0])
top_cit_per_year = int(df['cit/year'].values[0])
top_results = [top_author, top_citation, top_cit_per_year]
- self.assertEqual(top_results, [' Bishop', 49230, 2896])
+ self.assertEqual(top_results, ['Shale', 3166, 352])
def test_cit_per_year_sorted(self):
- df=self.df_top_sorted_cit_per_year
- top_citations=list(df.Citations.values[:5])
+ df = self.df_top_sorted_cit_per_year_cli
top_cit_per_year = list(df['cit/year'].values[:5])
- top_results = [top_citations, top_cit_per_year]
- self.assertEqual(top_results, [[49230, 8603, 2853, 3166, 2416],
- [2896, 782, 571, 352, 302]])
+ self.assertEqual(top_cit_per_year, [571, 352, 302, 85, 79])
def test_csv_exists(self):
- os.system("python sortgs.py --debug --kw 'machine learning' --nresults 10")
self.assertTrue(os.path.exists('machine_learning.csv'))
-
- def test_cli_get_10_results(self):
- self.assertEqual(len(self.df_top_10_cli), 10)
-
- def test_cli_get_20_results(self):
- self.assertEqual(len(self.df_top_20_cli), 20)
-
- def test_cli_is_sorted(self):
- df=self.df_top_20_cli
- top_citations=list(df.Citations.values[:5])
- self.assertEqual(top_citations, [49230, 8603, 3166, 3069, 2853])
-
- def test_cli_top_result(self):
- df=self.df_top_20_cli
- top_author = str(df.Author.values[0])
- top_citation = int(df.Citations.values[0])
- top_cit_per_year = int(df['cit/year'].values[0])
- top_results = [top_author, top_citation, top_cit_per_year]
- self.assertEqual(top_results, [' Bishop', 49230, 2896])
def test_cli_cit_per_year_sorted(self):
- df=self.df_top_sorted_cit_per_year_cli
- top_citations=list(df.Citations.values[:5])
+ df = self.df_top_sorted_cit_per_year_cli
+ top_citations = list(df.Citations.values[:5])
top_cit_per_year = list(df['cit/year'].values[:5])
- top_results = [top_citations, top_cit_per_year]
- self.assertEqual(top_results, [[49230, 8603, 2853, 3166, 2416],
- [2896, 782, 571, 352, 302]])
-
+
+ # Convert np.int64 values to Python int
+ top_citations = [int(c) for c in top_citations]
+ top_cit_per_year = [int(c) for c in top_cit_per_year]
+ top_results = [top_citations, top_cit_per_year]
+ self.assertEqual(top_results, [
+ [2853, 3166, 2416, 598, 948],
+ [571, 352, 302, 85, 79]
+ ])
if __name__=='__main__':
unittest.main()
\ No newline at end of file