From 6ff56c6498eb4c9357f8f3ed92d22a36d687bee2 Mon Sep 17 00:00:00 2001 From: Elliot <3186037+elliot-100@users.noreply.github.com> Date: Mon, 8 Jul 2024 18:11:20 +0100 Subject: [PATCH 1/3] Return NamedTuples instead of dicts, add to public API --- CHANGELOG.md | 4 +++ britishcycling_clubs/__init__.py | 8 ++++-- britishcycling_clubs/manager.py | 42 ++++++++++++++------------------ britishcycling_clubs/profile.py | 14 +++++------ tests/test_manager.py | 21 +++++++++++----- 5 files changed, 50 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ddd4a8..c8e4181 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ Historic and pre-release versions aren't necessarily included. ### Changed +- *BREAKING CHANGES:* + - `get_profile_info()` returns`ProfileInfo` `NamedTuple` instead of dict + - `get_manager_member_counts()` returns `ManagerMemberCounts` `NamedTuple` + instead of dict - Only get logger once, rather than for every log message - Docs: docstring simplification, improvements - Dev dependencies: add pdoc diff --git a/britishcycling_clubs/__init__.py b/britishcycling_clubs/__init__.py index 6eb623d..77c848b 100644 --- a/britishcycling_clubs/__init__.py +++ b/britishcycling_clubs/__init__.py @@ -1,17 +1,21 @@ """Module with functions to retrieve information about a club.""" from britishcycling_clubs.manager import ( + ManagerMemberCounts, club_manager_url_via_login, get_manager_member_counts, ) from britishcycling_clubs.profile import ( + ProfileInfo, club_profile_url, get_profile_info, ) __all__ = [ - "club_manager_url_via_login", - "get_manager_member_counts", "club_profile_url", "get_profile_info", + "ProfileInfo", + "club_manager_url_via_login", + "get_manager_member_counts", + "ManagerMemberCounts", ] diff --git a/britishcycling_clubs/manager.py b/britishcycling_clubs/manager.py index efa0c64..2f60e74 100644 --- a/britishcycling_clubs/manager.py +++ b/britishcycling_clubs/manager.py @@ -5,25 +5,22 @@ import logging import time from pprint import pformat -from typing import TYPE_CHECKING, TypedDict +from typing import NamedTuple from playwright.sync_api import sync_playwright -if TYPE_CHECKING: - from typing_extensions import TypeGuard - _MANAGER_VIA_LOGIN_BASE_URL = "https://www.britishcycling.org.uk/uac/connect?success_url=/dashboard/club/membership?club_id=" -class MemberCounts(TypedDict): - """Return type for `get_manager_member_counts()` function.""" +class ManagerMemberCounts(NamedTuple): + """Returned by `get_manager_member_counts()` function.""" active: int """Value from 'Active Club Members' tab.""" - new: int - """Value from 'New Club Subscriptions' tab.""" expired: int """Value from 'Expired Club Members' tab.""" + new: int + """Value from 'New Club Subscriptions' tab.""" def get_manager_member_counts( @@ -31,7 +28,7 @@ def get_manager_member_counts( username: str, password: str, manager_page_load_delay: int = 5, -) -> MemberCounts: +) -> ManagerMemberCounts: """Get number of active, new, expired members from the Club Manager page. This is a slow operation (circa 10s), so get them all in one go. @@ -110,7 +107,9 @@ def club_manager_url_via_login(club_id: str) -> str: return f"{_MANAGER_VIA_LOGIN_BASE_URL}{club_id}/" -def _process_manager_member_counts(member_counts: dict[str, str]) -> MemberCounts: +def _process_manager_member_counts( + counts: dict[str, str], +) -> ManagerMemberCounts: """Process raw values. Values are blank if there aren't any members (although they appear as zeros @@ -118,30 +117,25 @@ def _process_manager_member_counts(member_counts: dict[str, str]) -> MemberCount Raise exception if zero 'active members' value. """ - processed_member_counts = { - key: int(value) if value else 0 for key, value in member_counts.items() + processed_counts = { + key: int(value) if value else 0 for key, value in counts.items() } # Assume an error if zero 'active' value. # 'active' appears to be the slowest value to populate. # 'new' will often be genuinely zero; 'expired' could be genuinely zero - if processed_member_counts["active"] == 0: + if processed_counts["active"] == 0: error_message = ( "Active member count was zero; assuming issue with data collection. " - f"{pformat(processed_member_counts)}. " + f"{pformat(processed_counts)}. " "Consider increasing `manager_page_load_delay`." ) raise ValueError(error_message) - if not _is_membercounts(processed_member_counts): - raise TypeError - return processed_member_counts - - -def _is_membercounts(val: object) -> TypeGuard[MemberCounts]: - """Check return type.""" - if isinstance(val, dict): - return all(isinstance(v, int) for v in val.values()) - return False + return ManagerMemberCounts( + active=processed_counts["active"], + expired=processed_counts["expired"], + new=processed_counts["new"], + ) def _log_info(logger: logging.Logger, message: str, start_time: float) -> None: diff --git a/britishcycling_clubs/profile.py b/britishcycling_clubs/profile.py index bd8913c..8d5e4e8 100644 --- a/britishcycling_clubs/profile.py +++ b/britishcycling_clubs/profile.py @@ -1,6 +1,6 @@ """Functions to get information from a club's profile page.""" -from typing import TypedDict +from typing import NamedTuple import requests from bs4 import BeautifulSoup, Tag @@ -9,8 +9,8 @@ _REQUESTS_TIMEOUT = 10 # For `requests` library operations -class ProfileInfo(TypedDict): - """Return type for `get_profile_info()` function.""" +class ProfileInfo(NamedTuple): + """Returned by `get_profile_info()` function.""" club_name: str total_members: int @@ -40,10 +40,10 @@ def get_profile_info(club_id: str) -> ProfileInfo: error_message = f"Redirected to unexpected URL {r.url}. Is `club_id` valid?" raise ValueError(error_message) profile_soup = BeautifulSoup(r.content, "html.parser") - return { - "club_name": _club_name_from_profile(profile_soup), - "total_members": _total_members_from_profile(profile_soup), - } + return ProfileInfo( + club_name=_club_name_from_profile(profile_soup), + total_members=_total_members_from_profile(profile_soup), + ) def club_profile_url(club_id: str) -> str: diff --git a/tests/test_manager.py b/tests/test_manager.py index 54f4c26..a88d4d5 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -8,32 +8,41 @@ def test_club_manager_url_via_login__happy_path() -> None: """Test that correct URL is returned.""" + # arrange + club_id = "000" + # act + url = club_manager_url_via_login(club_id) + # act assert ( - club_manager_url_via_login("000") + url == "https://www.britishcycling.org.uk/uac/connect?success_url=/dashboard/club/membership?club_id=000/" ) def test__process_manager_member_counts__happy_path() -> None: """Test that raw values are converted to ints.""" + # arrange raw_counts = { "active": "123", "new": "", "expired": "67", } - assert _process_manager_member_counts(raw_counts) == { - "active": 123, - "new": 0, - "expired": 67, - } + # act + counts = _process_manager_member_counts(raw_counts) + # assert + assert counts.active == 123 + assert counts.new == 0 + assert counts.expired == 67 def test__process_manager_member_counts__blank_active_count_raises_exception() -> None: """Test that ValueError is raised if active is blank.""" + # arrange raw_counts = { "active": "", "new": "8", "expired": "67", } + # act, assert with pytest.raises(ValueError): _process_manager_member_counts(raw_counts) From 39eec6fb603d56aa456bd123d1d44eb3469eb235 Mon Sep 17 00:00:00 2001 From: Elliot <3186037+elliot-100@users.noreply.github.com> Date: Sat, 13 Jul 2024 20:09:59 +0100 Subject: [PATCH 2/3] rename `club_manager_url_via_login()` -> `manager_url_via_login()`; `club_profile_url()` -> `profile_url()` --- CHANGELOG.md | 4 +++- britishcycling_clubs/__init__.py | 8 ++++---- britishcycling_clubs/manager.py | 4 ++-- britishcycling_clubs/profile.py | 8 ++++---- tests/test_manager.py | 4 ++-- tests/test_profile.py | 6 ++---- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8e4181..6d49f39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,9 @@ Historic and pre-release versions aren't necessarily included. - *BREAKING CHANGES:* - `get_profile_info()` returns`ProfileInfo` `NamedTuple` instead of dict - `get_manager_member_counts()` returns `ManagerMemberCounts` `NamedTuple` - instead of dict + instead of dict + - `club_manager_url_via_login()` renamed to `manager_url_via_login()` + - `club_profile_url()` renamed to `profile_url()` - Only get logger once, rather than for every log message - Docs: docstring simplification, improvements - Dev dependencies: add pdoc diff --git a/britishcycling_clubs/__init__.py b/britishcycling_clubs/__init__.py index 77c848b..51b5a87 100644 --- a/britishcycling_clubs/__init__.py +++ b/britishcycling_clubs/__init__.py @@ -2,20 +2,20 @@ from britishcycling_clubs.manager import ( ManagerMemberCounts, - club_manager_url_via_login, get_manager_member_counts, + manager_url_via_login, ) from britishcycling_clubs.profile import ( ProfileInfo, - club_profile_url, get_profile_info, + profile_url, ) __all__ = [ - "club_profile_url", + "profile_url", "get_profile_info", "ProfileInfo", - "club_manager_url_via_login", + "manager_url_via_login", "get_manager_member_counts", "ManagerMemberCounts", ] diff --git a/britishcycling_clubs/manager.py b/britishcycling_clubs/manager.py index 2f60e74..c3fd27a 100644 --- a/britishcycling_clubs/manager.py +++ b/britishcycling_clubs/manager.py @@ -68,7 +68,7 @@ def get_manager_member_counts( page = browser.new_page() # login page - page.goto(club_manager_url_via_login(club_id)) + page.goto(manager_url_via_login(club_id)) page.locator("id=username2").fill(username) page.locator("id=password2").fill(password) page.locator("id=login_button").click() @@ -96,7 +96,7 @@ def get_manager_member_counts( return _process_manager_member_counts(raw_member_counts) -def club_manager_url_via_login(club_id: str) -> str: +def manager_url_via_login(club_id: str) -> str: """Return URL of club's Club Manager page. Parameters diff --git a/britishcycling_clubs/profile.py b/britishcycling_clubs/profile.py index 8d5e4e8..d4792fd 100644 --- a/britishcycling_clubs/profile.py +++ b/britishcycling_clubs/profile.py @@ -33,10 +33,10 @@ def get_profile_info(club_id: str) -> ProfileInfo: `ValueError` : if information can't be located. """ - profile_url = club_profile_url(club_id) - r = requests.get(profile_url, timeout=_REQUESTS_TIMEOUT) + url = profile_url(club_id) + r = requests.get(url, timeout=_REQUESTS_TIMEOUT) r.raise_for_status() - if r.url != profile_url: + if r.url != url: error_message = f"Redirected to unexpected URL {r.url}. Is `club_id` valid?" raise ValueError(error_message) profile_soup = BeautifulSoup(r.content, "html.parser") @@ -46,7 +46,7 @@ def get_profile_info(club_id: str) -> ProfileInfo: ) -def club_profile_url(club_id: str) -> str: +def profile_url(club_id: str) -> str: """Return URL of club's profile page. Parameters diff --git a/tests/test_manager.py b/tests/test_manager.py index a88d4d5..16e4f6f 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -2,7 +2,7 @@ import pytest -from britishcycling_clubs import club_manager_url_via_login +from britishcycling_clubs import manager_url_via_login from britishcycling_clubs.manager import _process_manager_member_counts @@ -11,7 +11,7 @@ def test_club_manager_url_via_login__happy_path() -> None: # arrange club_id = "000" # act - url = club_manager_url_via_login(club_id) + url = manager_url_via_login(club_id) # act assert ( url diff --git a/tests/test_profile.py b/tests/test_profile.py index cce2efd..06e0906 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -2,7 +2,7 @@ from bs4 import BeautifulSoup -from britishcycling_clubs import club_profile_url +from britishcycling_clubs import profile_url from britishcycling_clubs.profile import ( _club_name_from_profile, _total_members_from_profile, @@ -11,9 +11,7 @@ def test_club_profile_url__happy_path() -> None: """Test that correct URL is returned.""" - assert ( - club_profile_url("000") == "https://www.britishcycling.org.uk/club/profile/000/" - ) + assert profile_url("000") == "https://www.britishcycling.org.uk/club/profile/000/" # Partial extract from actual page From cc2805783cb9de38d00eb2c4e8a9ea5d815fa9a3 Mon Sep 17 00:00:00 2001 From: Elliot <3186037+elliot-100@users.noreply.github.com> Date: Mon, 8 Jul 2024 18:46:44 +0100 Subject: [PATCH 3/3] Update README --- README.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 66a059b..23e08a8 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,8 @@ See also https://playwright.dev/python/docs/browsers#install-system-dependencies ### Get info from a club's profile ```python -britishcycling_clubs.get_profile_info(club_id="123") +from britishcycling_clubs import get_profile_info +get_profile_info(club_id="123") ``` Returns a dict with these keys and values: @@ -56,13 +57,17 @@ It then retrieves and prints the club name and total member count. ### Construct club's profile URL + ```python -britishcycling_clubs.club_profile_url(club_id="123") +from britishcycling_clubs import profile_url +profile_url(club_id="123") ``` ### Get member counts from Club Manager + ```python -britishcycling_clubs.get_manager_member_counts( +from britishcycling_clubs import get_manager_member_counts +get_manager_member_counts( club_id="123", username="USERNAME", password="PASSWORD", @@ -84,8 +89,10 @@ It then retrieves and prints the number of active, expired and new club member counts from the club's Club Manager pages. ### Construct club's Club Manager URL (via login) + ```python -britishcycling_clubs.club_manager_url_via_login(club_id=123) +from britishcycling_clubs import manager_url_via_login +manager_url_via_login(club_id="123") ``` Returns URL which redirects to Club Manager URL, via login if needed.