Add project to PyPI

- Add setup and make sortgs available via CLI - Test CLI from setup.py - Add CLI commands to test - Github Action to send to PyPi when merging to master - Add bumb2version to manage pip versioning - Use build
WittmannF · Nov 16, 2023 · 2dad6d4 · 2dad6d4
1 parent e329efe
commit 2dad6d4
Show file tree

Hide file tree

Showing 7 changed files with 138 additions and 4 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -0,0 +1,6 @@
+[bumpversion]
+current_version = 1.0.0
+commit = True
+tag = True
+
+[bumpversion:file:setup.py]
diff --git a/.github/workflows/deploy-to-pypi.yml b/.github/workflows/deploy-to-pypi.yml
@@ -0,0 +1,45 @@
+name: Publish Python Package to PyPI
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - dev
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        fetch-depth: 0  # Ensures tags are fetched
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'  # Use the version appropriate for your project
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build twine bump2version
+
+    - name: Configure Git
+      run: |
+        git config --global user.name "GitHub Actions"
+        git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+    - name: Bump version and push tag
+      run: |
+        bump2version patch  # or 'minor' or 'major' depending on the release type
+        git push --tags
+
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+      run: |
+        python -m build
+        twine upload dist/*
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -8,7 +8,6 @@ on:
 
 jobs:
   build:
-
     runs-on: ubuntu-latest
 
     strategy:
@@ -26,5 +25,9 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements.txt
+    - name: Install the package
+      run: pip install -e .
     - name: Test with unittest
       run: python -m unittest
+    - name: Test CLI Command
+      run: sortgs 'machine learning' --nresults 10
diff --git a/.gitignore b/.gitignore
@@ -20,4 +20,9 @@ ipython_config.py
 
 # Other variables
 .DS_Store
-.vscode/
+.vscode/
+
+# Ignore egg-info, dist folders and build
+*.egg-info
+dist
+build
diff --git a/setup.py b/setup.py
@@ -0,0 +1,27 @@
+from setuptools import setup
+
+setup(
+    name='sortgs',
+    version='1.0.0',
+    author='Fernando Marcos Wittmann',
+    #author_email='fernando.wittmann[at]gmail[dot]com',
+    description='A Python tool to rank Google Scholar publications by citations.',
+    long_description=open('README.md').read(),
+    long_description_content_type='text/markdown',
+    url='https://github.com/WittmannF/sort-google-scholar',
+    py_modules=['sortgs'],  # Assuming your script is named sortgs.py
+    install_requires=[
+        # your dependencies here
+    ],
+    entry_points={
+        'console_scripts': [
+            'sortgs=sortgs:main',  # This line sets up the command line tool
+        ],
+    },
+    classifiers=[
+        'Programming Language :: Python :: 3',
+        'License :: OSI Approved :: MIT License',
+        'Operating System :: OS Independent',
+    ],
+    python_requires='>=3.6',
+)
diff --git a/sortgs.py b/sortgs.py
@@ -22,6 +22,7 @@
 import pandas as pd
 from time import sleep
 import warnings
+import os
 
 # Solve conflict between raw_input and input on Python 2 and Python 3
 import sys
@@ -30,7 +31,7 @@
 # Default Parameters
 KEYWORD = 'machine learning' # Default argument if command line is empty
 NRESULTS = 100 # Fetch 100 articles
-CSVPATH = '.' # Current folder
+CSVPATH = os.getcwd() # Current folder as default path
 SAVECSV = True
 SORTBY = 'Citations'
 PLOT_RESULTS = False
@@ -53,7 +54,7 @@
 def get_command_line_args():
     # Command line arguments
     parser = argparse.ArgumentParser(description='Arguments')
-    parser.add_argument('--kw', type=str, help="""Keyword to be searched. Use double quote followed by simple quote to search for an exact keyword. Example: "'exact keyword'" """)
+    parser.add_argument('kw', type=str, help="""Keyword to be searched. Use double quote followed by simple quote to search for an exact keyword. Example: "'exact keyword'" """, default=KEYWORD)
     parser.add_argument('--sortby', type=str, help='Column to be sorted by. Default is by the columns "Citations", i.e., it will be sorted by the number of citations. If you want to sort by citations per year, use --sortby "cit/year"')
     parser.add_argument('--nresults', type=int, help='Number of articles to search on Google Scholar. Default is 100. (carefull with robot checking if value is too high)')
     parser.add_argument('--csvpath', type=str, help='Path to save the exported csv file. By default it is the current folder')
@@ -66,6 +67,11 @@ def get_command_line_args():
     # Parse and read arguments and assign them to variables if exists
     args, _ = parser.parse_known_args()
 
+    # Check if no arguments were provided and print help if so
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(0)
+
     keyword = KEYWORD
     if args.kw:
         keyword = args.kw
@@ -180,6 +186,9 @@ def main():
     # Get command line arguments
     keyword, number_of_results, save_database, path, sortby_column, plot_results, start_year, end_year, debug = get_command_line_args()
 
+    print("Running with the following parameters:")
+    print(f"Keyword: {keyword}, Number of results: {number_of_results}, Save database: {save_database}, Path: {path}, Sort by: {sortby_column}, Plot results: {plot_results}, Start year: {start_year}, End year: {end_year}, Debug: {debug}")
+
     # Create main URL based on command line arguments
     if start_year:
         GSCHOLAR_MAIN_URL = GSCHOLAR_URL + STARTYEAR_URL.format(start_year)
@@ -308,6 +317,7 @@ def main():
         fpath_csv = os.path.join(path,keyword.replace(' ','_')+'.csv')
         fpath_csv = fpath_csv[:MAX_CSV_FNAME]
         data_ranked.to_csv(fpath_csv, encoding='utf-8')
+        print('Results saved to', fpath_csv)
 
 if __name__ == '__main__':
         main()
diff --git a/test/test_sortgs.py b/test/test_sortgs.py
@@ -16,6 +16,16 @@ def setUpClass(self):
 
         os.system("python sortgs.py --debug --kw 'machine learning' --nresults 20 --endyear 2022 --sortby 'cit/year'")
         self.df_top_sorted_cit_per_year=pd.read_csv('machine_learning.csv')
+
+        # Repeat the above, but testing the cli command
+        os.system("sortgs 'machine learning' --debug --nresults 10 --endyear 2022")
+        self.df_top_10_cli=pd.read_csv('machine_learning.csv')
+
+        os.system("sortgs 'machine learning' --debug --nresults 20 --endyear 2022")
+        self.df_top_20_cli=pd.read_csv('machine_learning.csv')
+
+        os.system("sortgs 'machine learning' --debug --nresults 20 --endyear 2022 --sortby 'cit/year'")
+        self.df_top_sorted_cit_per_year_cli=pd.read_csv('machine_learning.csv')
 
     def test_get_10_results(self):
         self.assertEqual(len(self.df_top_10), 10)
@@ -47,6 +57,34 @@ def test_cit_per_year_sorted(self):
     def test_csv_exists(self):
         os.system("python sortgs.py --debug --kw 'machine learning' --nresults 10")
         self.assertTrue(os.path.exists('machine_learning.csv'))
+
+    def test_cli_get_10_results(self):
+        self.assertEqual(len(self.df_top_10_cli), 10)
+
+    def test_cli_get_20_results(self):
+        self.assertEqual(len(self.df_top_20_cli), 20)
+
+    def test_cli_is_sorted(self):
+        df=self.df_top_20_cli
+        top_citations=list(df.Citations.values[:5])
+        self.assertEqual(top_citations, [49230, 8603, 3166, 3069, 2853])
+
+    def test_cli_top_result(self):
+        df=self.df_top_20_cli
+        top_author = str(df.Author.values[0])
+        top_citation = int(df.Citations.values[0])
+        top_cit_per_year = int(df['cit/year'].values[0])
+        top_results = [top_author, top_citation, top_cit_per_year]
+        self.assertEqual(top_results, [' Bishop', 49230, 2896])
+
+    def test_cli_cit_per_year_sorted(self):
+        df=self.df_top_sorted_cit_per_year_cli
+        top_citations=list(df.Citations.values[:5])
+        top_cit_per_year = list(df['cit/year'].values[:5])
+        top_results = [top_citations, top_cit_per_year]
+        self.assertEqual(top_results, [[49230, 8603, 2853, 3166, 2416],
+                                        [2896, 782, 571, 352, 302]])
+
 
 
 if __name__=='__main__':