Skip to content

Commit

Permalink
ci: Update tests and add Python 3.13 to workflow (#3)
Browse files Browse the repository at this point in the history
* test: Remove unit test dependency on external URL

* docs: Add workflow status to README.md

* ci: Update workflows and dependencies, add uv lock file

* ci: Update workflow for uv
  • Loading branch information
essteer authored Oct 17, 2024
1 parent 9477e06 commit 4475ca5
Show file tree
Hide file tree
Showing 8 changed files with 349 additions and 84 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ruff.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: ruff
name: Formatting
on:
# Trigger workflow on main branch push or pull request
push:
Expand All @@ -15,5 +15,5 @@ jobs:
- uses: chartboost/ruff-action@v1
with:
# Ruff version - sync with .pre-commit-config.yaml
version: "0.4.10"
version: "0.6.9"
args: check --fix --exit-non-zero-on-fix --show-fixes
22 changes: 10 additions & 12 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: test
name: Tests

on:
push:
Expand All @@ -12,24 +12,22 @@ jobs:
test:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ['3.10', 3.11, 3.12]
os: [ubuntu-latest]
python-version: ['3.10', 3.11, 3.12, 3.13]

runs-on: ${{ matrix.os }}

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v3
with:
python-version: ${{ matrix.python-version }}
version: "latest"

- name: Install dependencies
run: |
pip install -r requirements.txt
- name: Set up Python
run: uv python install ${{ matrix.python-version }}

- name: Run tests
run: |
python -m unittest discover
- name: Install dependencies and run tests
run: uv run python3 -m unittest discover
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version - sync with .github/workflows/ruff.yaml
rev: 'v0.4.10'
rev: 'v0.6.9'
hooks:
# Run the linter.
- id: ruff
Expand Down
18 changes: 12 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
# gigsgo-pipeline
<h1 align="center" id="title">Gigsgo Pipeline</h1>

ETL pipeline for gigs data
<p align="center">
<a href="https://github.com/essteer/gigsgo-pipeline/actions/workflows/test.yaml"><img src="https://github.com/essteer/gigsgo-pipeline/actions/workflows/test.yaml/badge.svg"></a>
<a href="https://pypi.org/project/gigsgo-pipeline/"><img src="https://img.shields.io/badge/Python-3.10_~_3.13-3776AB.svg?style=flat&logo=Python&logoColor=white"></a>
<a href="https://snyk.io/test/github/essteer/gigsgo-pipeline"><img src="https://snyk.io/test/github/essteer/gigsgo-pipeline/badge.svg?name=Snyk&style=flat&logo=Snyk"></a>
</p>

<p align="center">
An ETL pipeline for live music listings.
</p>

## Operation

```console
$ source .venv/bin/activate
$ python3 -m src.main -v 'https://www.example.com'
$ uv run python3 -m src.main -v 'https://www.example.com'
```

## Tests

```console
$ source .venv/bin/activate
$ python3 -m unittest discover -s tests
$ uv run python3 -m unittest discover -s tests
```
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "gigsgo-pipeline"
dynamic = ["version"]
description = "An ETL pipeline for gig data"
description = "An ETL pipeline for live music listings"
authors = [{ name = "Elliott Steer", email = "essteer@pm.me" }]
dependencies = [
"beautifulsoup4==4.12.3",
Expand All @@ -22,12 +22,13 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]

[project.optional-dependencies]
dev = [
"pre-commit==3.7.1",
"ruff>=0.4.10",
"ruff==0.6.9",
]

[project.urls]
Expand Down
18 changes: 0 additions & 18 deletions requirements.txt

This file was deleted.

43 changes: 0 additions & 43 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

TEST_ASSETS = os.path.abspath(os.path.join("tests", "assets"))
TEST_CASES = [HTML_1, HTML_2, HTML_3, HTML_4]
TEST_URL = "https://undergroundhk.com/gig-guide-22-august-5-september-2024/"


class TestDataPipeline(unittest.TestCase):
Expand All @@ -23,19 +22,13 @@ def test_all_matches_persist(self):
self.assertEqual(
sum(1 for _ in data_pipeline(test_case, False)), num_matches[test_case]
)
# NOTE: this test relies on external source, check source if not found
self.assertEqual(sum(1 for _ in data_pipeline(TEST_URL)), 109)

def test_matches_are_dicts(self):
"""Test all matches in dict format"""
for test_case in TEST_CASES:
matches = data_pipeline(test_case, False)
for match in matches:
self.assertIsInstance(match, dict)
# NOTE: this test relies on external source, check source if not found
matches = data_pipeline(TEST_URL)
for match in matches:
self.assertIsInstance(match, dict)

def test_expected_fields_present(self):
"""Test expected fields present"""
Expand All @@ -55,22 +48,6 @@ def test_expected_fields_present(self):
self.assertIn("address_cn", match)
else:
self.assertIn("address_raw", match)
# NOTE: this test relies on external source, check source if not found
matches = data_pipeline(TEST_URL)
for match in matches:
self.assertIn("weekday", match)
self.assertIn("month", match)
self.assertIn("date", match)
self.assertIn("desc", match)
self.assertIn("open", match)
self.assertIn("close", match)
self.assertIn("bands", match)
self.assertIn("tickets", match)
if "venue" in match:
self.assertIn("address_en", match)
self.assertIn("address_cn", match)
else:
self.assertIn("address_raw", match)

def test_match_content_types_correct(self):
"""Test values in each match dict are of expected types"""
Expand All @@ -94,26 +71,6 @@ def test_match_content_types_correct(self):
self.assertIsInstance(match["address_cn"], str)
else:
self.assertIsInstance(match["address_raw"], str)
# NOTE: this test relies on external source, check source if not found
matches = data_pipeline(TEST_URL)
for match in matches:
self.assertIsInstance(match["_id"], str)
self.assertIsInstance(match["datestring"], str)
self.assertIsInstance(match["weekday"], str)
self.assertIsInstance(match["day"], int)
self.assertIsInstance(match["month"], int)
self.assertIsInstance(match["date"], int)
self.assertIsInstance(match["desc"], str)
self.assertIsInstance(match["open"], str)
self.assertIsInstance(match["close"], str)
self.assertIsInstance(match["bands"], list)
self.assertIsInstance(match["tickets"], dict)
if "venue" in match:
self.assertIsInstance(match["venue"], str)
self.assertIsInstance(match["address_en"], str)
self.assertIsInstance(match["address_cn"], str)
else:
self.assertIsInstance(match["address_raw"], str)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 4475ca5

Please sign in to comment.