Skip to content

Scrape latest data #1098

Scrape latest data

Scrape latest data #1098

Workflow file for this run

name: Scrape latest data
on:
schedule:
- cron: '14 2 * * *'
workflow_dispatch: {}
jobs:
scheduled:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: 2.6
- name: Install OS dependencies
run: sudo apt-get install jq moreutils
- name: Install ruby dependencies
run: bundle install
- name: Get current wikipedia list
run: bundle exec ruby bin/scraper/wikipedia.rb | ifne tee data/wikipedia.csv
- name: Get current wikidata list
run: bundle exec ruby bin/scraper/wikidata.rb meta.json | ifne tee data/wikidata.csv
- name: Generate diff
run: bundle exec ruby bin/diff.rb | tee data/diff.csv
- name: Create pull request
id: cpr
uses: peter-evans/create-pull-request@v3
with:
commit-message: Automated update
committer: GitHub <noreply@github.com>
author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
signoff: false
branch: data-update
delete-branch: true
title: 'Current list'
assignees: tmtmtmtm
reviewers: tmtmtmtm
- name: Check outputs
run: |
echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}"
echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}"