Add action config

tabhub · Oct 1, 2022 · b06aaca · b06aaca
1 parent ebd6dd0
commit b06aaca
Show file tree

Hide file tree

Showing 6 changed files with 275 additions and 79 deletions.
diff --git a/.github/workflows/test-gene-rss.yml b/.github/workflows/test-gene-rss.yml
@@ -0,0 +1,21 @@
+name: Test RSS generation
+
+on: [push]
+
+jobs:
+  gene-rss:
+    runs-on: ubuntu-latest
+    name: A job to test RSS generation
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+    - name: Test Rssify
+      uses: ./
+      with:
+        title: Test Rssify
+        url: https://gitx.io/
+        item-title-css: .post-title > a
+        item-url-css: .post-title > a
+        item-description-css: .cover-text
+        item-date-css: .post-time
+        item-date-format: '%Y-%m-%d'
diff --git a/.gitignore b/.gitignore
@@ -1 +1,132 @@
-*.xml
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# this .python-version is for test
+!test/other/versions/.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/README.md b/README.md
@@ -1,21 +1,3 @@
-I looked at a few online services that provide custom created RSS feeds
-for websites that don't have one. None of them were free of charge 
-without being too much limited in functionality for me.
+You can generate an RSS feed for some blog or website that hasn't one.
 
-So I hacked together this very simple rssify.py script.
-It reads from a config file your websites you want to rssify.
-It could be easily extened for more features if needed.
-For now it only parses the title and date via css selectors and generates
-a feed.xml file which can be imported into newsboat/newsbeuter or I guess any
-other rss reader.
-
-```config.ini
-[Jodel Engineering Blog]
-url = https://jodel.com/engineering/
-item_title = .post-title > a
-item_date = .post-date
-item_date_format = %%b %%d, %%Y
-item_timezone = Europe/Berlin
-```
-
-The script runs once daily in a cronjob on my local machine.
+See [test workflow](https://github.com/tabhub/rssify/blob/master/.github/workflows/test-gene-rss.yml) for an example.
diff --git a/action.yml b/action.yml
@@ -0,0 +1,70 @@
+name: 'RSSify Any'
+description: 'To generate an RSS feed for any blog/website'
+inputs:
+  title:
+    description: 'RSS title'
+    required: true
+  subtitle:
+    description: 'RSS subtitle'
+    required: false
+  url:
+    description: 'Website url'
+    required: true
+  item-title-css:
+    description: 'CSS selector to get item title'
+    required: true
+  item-url-css:
+    description: 'CSS selector to get item url'
+    required: true
+  item-description-css:
+    description: 'CSS selector to get item description'
+    required: false
+  item-date-css:
+    description: 'CSS selector to get Item post date'
+    required: false
+  item-date-format:
+    description: 'Date format of item post date, help to parse date'
+    required: false
+  item-timezone:
+    description: 'Timezone applied to item post date'
+    required: false
+    default: 'Asia/Shanghai'
+runs:
+  using: "composite"
+  steps:
+    - name: Install Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+    - name: Install Dependencies
+      run: pip install requests feedgen bs4 pytz
+      shell: bash
+    - name: Pass Inputs to Shell
+      run: |
+       echo "TITLE=${{ inputs.title }}" >> $GITHUB_ENV
+       echo "SUBTITLE=${{ inputs.subtitle }}" >> $GITHUB_ENV
+       echo "URL=${{ inputs.url }}" >> $GITHUB_ENV
+       echo "ITEM_TITLE_CSS=${{ inputs.item-title-css }}" >> $GITHUB_ENV
+       echo "ITEM_URL_CSS=${{ inputs.item-url-css }}" >> $GITHUB_ENV
+       echo "ITEM_DESCRIPTION_CSS=${{ inputs.item-description-css }}" >> $GITHUB_ENV
+       echo "ITEM_DATE_CSS=${{ inputs.item-date-css }}" >> $GITHUB_ENV
+       echo "ITEM_DATE_FORMAT=${{ inputs.item-date-format }}" >> $GITHUB_ENV
+       echo "ITEM_TIMEZONE=${{ inputs.item-timezone }}" >> $GITHUB_ENV
+      shell: bash
+    - name: Generate RSS files
+      run: python ./rssify.py
+      shell: bash
+    - name: Store RSS files in the repo
+      run: |
+        # git commit if there's any change
+        if test -n "$(git status --porcelain 2>/dev/null)"; then
+            git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+            git config --local user.name "github-actions[bot]"
+            git add .
+            git commit -m "Update RSS files"
+            git push origin ${GITHUB_REF##*/}
+        fi
+      shell: bash
+branding:
+  icon: 'rss'
+  color: 'green'
diff --git a/config.ini b/config.ini
diff --git a/rssify.py b/rssify.py
@@ -1,46 +1,58 @@
-import configparser
+import os
+from urllib.parse import urljoin
 from datetime import datetime
 import requests
 from feedgen.feed import FeedGenerator
 from bs4 import BeautifulSoup
 from pytz import timezone
 
-config = configparser.ConfigParser()
-config.read('config.ini')
+title = os.environ.get('TITLE')
+subtitle = os.environ.get('SUBTITLE')
+url = os.environ.get('URL')
+item_title_selector = os.environ.get('ITEM_TITLE_CSS')
+item_url_selector = os.environ.get('ITEM_URL_CSS')
+item_description_selector = os.environ.get('ITEM_DESCRIPTION_CSS')
+item_date_selector = os.environ.get('ITEM_DATE_CSS')
+item_date_format = os.environ.get('ITEM_DATE_FORMAT')
+item_timezone = os.environ.get('ITEM_TIMEZONE')
+
+r = requests.get(url)
+soup = BeautifulSoup(r.text, 'lxml')
+titles = soup.select(item_title_selector)
+urls = soup.select(item_url_selector)
+descriptions = soup.select(item_description_selector)
+
+if item_date_selector:
+    dates = soup.select(item_date_selector)
+else:
+    dates = None
+
+fg = FeedGenerator()
+fg.title(title)
+if subtitle:
+    fg.subtitle(subtitle)
+else:
+    fg.subtitle('Generated by TabHub Rssify(https://tabhub.github.io/)')
+fg.link(href='https://tabhub.github.io/', rel='alternate')
+
+for i in range(len(titles)):
+    if i > len(urls) - 1:
+        break
+
+    fe = fg.add_entry()
+    fe.title(titles[i].text)
+    fe.link(href=urljoin(url, urls[i].get('href')), rel='alternate')
+    if descriptions[i]:
+        fe.description(descriptions[i].text)
+    if dates is not None and item_date_format:
+        date = datetime.strptime(dates[i].text.strip(), item_date_format)
+        if item_timezone:
+            localtz = timezone(item_timezone)
+            date = localtz.localize(date)
+    else:
+        #date = datetime.now(timezone("Europe/Berlin"))
+        date = '1970-01-01 00:00:00+02:00'
 
-for section in config.sections():
-    s = dict(config.items(section))
-    r = requests.get(s['url'])
-    soup = BeautifulSoup(r.text, 'lxml')
-    titles = soup.select(s['item_title'])
-    urls = soup.select(s['item_url'])
+    fe.published(date)
 
-    if 'item_date' in s:
-        dates = soup.select(s['item_date'])
-    else:
-        dates = None
-
-    fg = FeedGenerator()
-    fg.title(section)
-    fg.description(section)
-    fg.link(href=s['url'], rel='alternate')
-
-    for i in range(len(titles)):
-        if i > len(urls) - 1:
-            break
-
-        fe = fg.add_entry()
-        fe.title(titles[i].text)
-        fe.link(href=urls[i].get('href'), rel='alternate')
-        if dates is not None:
-            date = datetime.strptime(dates[i].text.strip(), s['item_date_format'])
-            if config.has_option(section, 'item_timezone'):
-                localtz = timezone(s['item_timezone'])        
-                date = localtz.localize(date)
-        else:
-            #date = datetime.now(timezone("Europe/Berlin")) 
-            date = '1970-01-01 00:00:00+02:00'
-
-        fe.published(date)
-
-    fg.rss_file(section.replace(' ', '_') + '.xml')
+fg.rss_file('rss.xml')