Skip to content

scrape

scrape #249

Workflow file for this run

name: scrape
on:
workflow_dispatch:
schedule:
- cron: '0 0 */3 * *'
jobs:
scrape:
runs-on: ubuntu-latest
steps:
- name: Check out this repo
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.9'
- name: Installed package list
run: apt list --installed
- name: Remove Chrome
run: sudo apt purge google-chrome-stable
- name: Remove default Chromium
run: sudo apt purge chromium-browser
- name: Install a new Chromium
run: sudo apt install -y chromium-browser
- name: Install all necessary packages
run: pip install requests beautifulsoup4 pandas webdriver_manager selenium regex packaging
- name: Get update
run: sudo apt-get update
- name: Run the scraping script
run: python autoscraper.py
- name: Commit and push if content changed
run: |-
git config user.name "Automated"
git config user.email "actions@users.noreply.github.com"
git add -A
timestamp=$(date -u)
git commit -m "Latest data: ${timestamp}" || exit 0
git push
- name: Pushes to another repository
id: push_directory
uses: cpina/github-action-push-to-another-repository@main
env:
API_TOKEN_GITHUB: ${{ secrets.API_TOKEN_GITHUB }}
with:
source-directory: output/
destination-github-username: 'pmagtulis'
destination-repository-name: 'foi-analysis'
target-directory: output/
user-email: carles3@pina.cat
commit-message: See ORIGIN_COMMIT from $GITHUB_REF
target-branch: main