Skip to content

Commit

Permalink
Add project to PyPI
Browse files Browse the repository at this point in the history
- Add setup and make sortgs available via CLI
- Test CLI from setup.py
- Add CLI commands to test
- Github Action to send to PyPi when merging to master
- Add bumb2version to manage pip versioning
- Use build
  • Loading branch information
WittmannF committed Nov 16, 2023
1 parent e329efe commit 2dad6d4
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 4 deletions.
6 changes: 6 additions & 0 deletions .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[bumpversion]
current_version = 1.0.0
commit = True
tag = True

[bumpversion:file:setup.py]
45 changes: 45 additions & 0 deletions .github/workflows/deploy-to-pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Publish Python Package to PyPI

on:
push:
branches:
- master
pull_request:
branches:
- dev

jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0 # Ensures tags are fetched

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x' # Use the version appropriate for your project

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build twine bump2version
- name: Configure Git
run: |
git config --global user.name "GitHub Actions"
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
- name: Bump version and push tag
run: |
bump2version patch # or 'minor' or 'major' depending on the release type
git push --tags
- name: Build and publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
run: |
python -m build
twine upload dist/*
5 changes: 4 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ on:

jobs:
build:

runs-on: ubuntu-latest

strategy:
Expand All @@ -26,5 +25,9 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Install the package
run: pip install -e .
- name: Test with unittest
run: python -m unittest
- name: Test CLI Command
run: sortgs 'machine learning' --nresults 10
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ ipython_config.py

# Other variables
.DS_Store
.vscode/
.vscode/

# Ignore egg-info, dist folders and build
*.egg-info
dist
build
27 changes: 27 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from setuptools import setup

setup(
name='sortgs',
version='1.0.0',
author='Fernando Marcos Wittmann',
#author_email='fernando.wittmann[at]gmail[dot]com',
description='A Python tool to rank Google Scholar publications by citations.',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
url='https://github.com/WittmannF/sort-google-scholar',
py_modules=['sortgs'], # Assuming your script is named sortgs.py
install_requires=[
# your dependencies here
],
entry_points={
'console_scripts': [
'sortgs=sortgs:main', # This line sets up the command line tool
],
},
classifiers=[
'Programming Language :: Python :: 3',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
],
python_requires='>=3.6',
)
14 changes: 12 additions & 2 deletions sortgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import pandas as pd
from time import sleep
import warnings
import os

# Solve conflict between raw_input and input on Python 2 and Python 3
import sys
Expand All @@ -30,7 +31,7 @@
# Default Parameters
KEYWORD = 'machine learning' # Default argument if command line is empty
NRESULTS = 100 # Fetch 100 articles
CSVPATH = '.' # Current folder
CSVPATH = os.getcwd() # Current folder as default path
SAVECSV = True
SORTBY = 'Citations'
PLOT_RESULTS = False
Expand All @@ -53,7 +54,7 @@
def get_command_line_args():
# Command line arguments
parser = argparse.ArgumentParser(description='Arguments')
parser.add_argument('--kw', type=str, help="""Keyword to be searched. Use double quote followed by simple quote to search for an exact keyword. Example: "'exact keyword'" """)
parser.add_argument('kw', type=str, help="""Keyword to be searched. Use double quote followed by simple quote to search for an exact keyword. Example: "'exact keyword'" """, default=KEYWORD)
parser.add_argument('--sortby', type=str, help='Column to be sorted by. Default is by the columns "Citations", i.e., it will be sorted by the number of citations. If you want to sort by citations per year, use --sortby "cit/year"')
parser.add_argument('--nresults', type=int, help='Number of articles to search on Google Scholar. Default is 100. (carefull with robot checking if value is too high)')
parser.add_argument('--csvpath', type=str, help='Path to save the exported csv file. By default it is the current folder')
Expand All @@ -66,6 +67,11 @@ def get_command_line_args():
# Parse and read arguments and assign them to variables if exists
args, _ = parser.parse_known_args()

# Check if no arguments were provided and print help if so
if len(sys.argv) == 1:
parser.print_help()
sys.exit(0)

keyword = KEYWORD
if args.kw:
keyword = args.kw
Expand Down Expand Up @@ -180,6 +186,9 @@ def main():
# Get command line arguments
keyword, number_of_results, save_database, path, sortby_column, plot_results, start_year, end_year, debug = get_command_line_args()

print("Running with the following parameters:")
print(f"Keyword: {keyword}, Number of results: {number_of_results}, Save database: {save_database}, Path: {path}, Sort by: {sortby_column}, Plot results: {plot_results}, Start year: {start_year}, End year: {end_year}, Debug: {debug}")

# Create main URL based on command line arguments
if start_year:
GSCHOLAR_MAIN_URL = GSCHOLAR_URL + STARTYEAR_URL.format(start_year)
Expand Down Expand Up @@ -308,6 +317,7 @@ def main():
fpath_csv = os.path.join(path,keyword.replace(' ','_')+'.csv')
fpath_csv = fpath_csv[:MAX_CSV_FNAME]
data_ranked.to_csv(fpath_csv, encoding='utf-8')
print('Results saved to', fpath_csv)

if __name__ == '__main__':
main()
38 changes: 38 additions & 0 deletions test/test_sortgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ def setUpClass(self):

os.system("python sortgs.py --debug --kw 'machine learning' --nresults 20 --endyear 2022 --sortby 'cit/year'")
self.df_top_sorted_cit_per_year=pd.read_csv('machine_learning.csv')

# Repeat the above, but testing the cli command
os.system("sortgs 'machine learning' --debug --nresults 10 --endyear 2022")
self.df_top_10_cli=pd.read_csv('machine_learning.csv')

os.system("sortgs 'machine learning' --debug --nresults 20 --endyear 2022")
self.df_top_20_cli=pd.read_csv('machine_learning.csv')

os.system("sortgs 'machine learning' --debug --nresults 20 --endyear 2022 --sortby 'cit/year'")
self.df_top_sorted_cit_per_year_cli=pd.read_csv('machine_learning.csv')

def test_get_10_results(self):
self.assertEqual(len(self.df_top_10), 10)
Expand Down Expand Up @@ -47,6 +57,34 @@ def test_cit_per_year_sorted(self):
def test_csv_exists(self):
os.system("python sortgs.py --debug --kw 'machine learning' --nresults 10")
self.assertTrue(os.path.exists('machine_learning.csv'))

def test_cli_get_10_results(self):
self.assertEqual(len(self.df_top_10_cli), 10)

def test_cli_get_20_results(self):
self.assertEqual(len(self.df_top_20_cli), 20)

def test_cli_is_sorted(self):
df=self.df_top_20_cli
top_citations=list(df.Citations.values[:5])
self.assertEqual(top_citations, [49230, 8603, 3166, 3069, 2853])

def test_cli_top_result(self):
df=self.df_top_20_cli
top_author = str(df.Author.values[0])
top_citation = int(df.Citations.values[0])
top_cit_per_year = int(df['cit/year'].values[0])
top_results = [top_author, top_citation, top_cit_per_year]
self.assertEqual(top_results, [' Bishop', 49230, 2896])

def test_cli_cit_per_year_sorted(self):
df=self.df_top_sorted_cit_per_year_cli
top_citations=list(df.Citations.values[:5])
top_cit_per_year = list(df['cit/year'].values[:5])
top_results = [top_citations, top_cit_per_year]
self.assertEqual(top_results, [[49230, 8603, 2853, 3166, 2416],
[2896, 782, 571, 352, 302]])



if __name__=='__main__':
Expand Down

0 comments on commit 2dad6d4

Please sign in to comment.