Skip to content

Commit

Permalink
Merge pull request #99 from elliot-100/98-revise-api
Browse files Browse the repository at this point in the history
Revise API (BREAKING CHANGE)
  • Loading branch information
elliot-100 authored Jul 15, 2024
2 parents 08d6a2d + cc28057 commit 9728626
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 57 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ Historic and pre-release versions aren't necessarily included.

### Changed

- *BREAKING CHANGES:*
- `get_profile_info()` returns`ProfileInfo` `NamedTuple` instead of dict
- `get_manager_member_counts()` returns `ManagerMemberCounts` `NamedTuple`
instead of dict
- `club_manager_url_via_login()` renamed to `manager_url_via_login()`
- `club_profile_url()` renamed to `profile_url()`
- Only get logger once, rather than for every log message
- Docs: docstring simplification, improvements
- Dev dependencies: add pdoc
Expand Down
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ See also https://playwright.dev/python/docs/browsers#install-system-dependencies
### Get info from a club's profile

```python
britishcycling_clubs.get_profile_info(club_id="123")
from britishcycling_clubs import get_profile_info
get_profile_info(club_id="123")
```
Returns a dict with these keys and values:

Expand All @@ -56,13 +57,17 @@ It then retrieves and prints the club name and total member count.


### Construct club's profile URL

```python
britishcycling_clubs.club_profile_url(club_id="123")
from britishcycling_clubs import profile_url
profile_url(club_id="123")
```

### Get member counts from Club Manager

```python
britishcycling_clubs.get_manager_member_counts(
from britishcycling_clubs import get_manager_member_counts
get_manager_member_counts(
club_id="123",
username="USERNAME",
password="PASSWORD",
Expand All @@ -84,8 +89,10 @@ It then retrieves and prints the number of active, expired and new
club member counts from the club's Club Manager pages.

### Construct club's Club Manager URL (via login)

```python
britishcycling_clubs.club_manager_url_via_login(club_id=123)
from britishcycling_clubs import manager_url_via_login
manager_url_via_login(club_id="123")
```
Returns URL which redirects to Club Manager URL, via login if needed.

Expand Down
14 changes: 9 additions & 5 deletions britishcycling_clubs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
"""Module with functions to retrieve information about a club."""

from britishcycling_clubs.manager import (
club_manager_url_via_login,
ManagerMemberCounts,
get_manager_member_counts,
manager_url_via_login,
)
from britishcycling_clubs.profile import (
club_profile_url,
ProfileInfo,
get_profile_info,
profile_url,
)

__all__ = [
"club_manager_url_via_login",
"get_manager_member_counts",
"club_profile_url",
"profile_url",
"get_profile_info",
"ProfileInfo",
"manager_url_via_login",
"get_manager_member_counts",
"ManagerMemberCounts",
]
46 changes: 20 additions & 26 deletions britishcycling_clubs/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,30 @@
import logging
import time
from pprint import pformat
from typing import TYPE_CHECKING, TypedDict
from typing import NamedTuple

from playwright.sync_api import sync_playwright

if TYPE_CHECKING:
from typing_extensions import TypeGuard

_MANAGER_VIA_LOGIN_BASE_URL = "https://www.britishcycling.org.uk/uac/connect?success_url=/dashboard/club/membership?club_id="


class MemberCounts(TypedDict):
"""Return type for `get_manager_member_counts()` function."""
class ManagerMemberCounts(NamedTuple):
"""Returned by `get_manager_member_counts()` function."""

active: int
"""Value from 'Active Club Members' tab."""
new: int
"""Value from 'New Club Subscriptions' tab."""
expired: int
"""Value from 'Expired Club Members' tab."""
new: int
"""Value from 'New Club Subscriptions' tab."""


def get_manager_member_counts(
club_id: str,
username: str,
password: str,
manager_page_load_delay: int = 5,
) -> MemberCounts:
) -> ManagerMemberCounts:
"""Get number of active, new, expired members from the Club Manager page.
This is a slow operation (circa 10s), so get them all in one go.
Expand Down Expand Up @@ -71,7 +68,7 @@ def get_manager_member_counts(
page = browser.new_page()

# login page
page.goto(club_manager_url_via_login(club_id))
page.goto(manager_url_via_login(club_id))
page.locator("id=username2").fill(username)
page.locator("id=password2").fill(password)
page.locator("id=login_button").click()
Expand Down Expand Up @@ -99,7 +96,7 @@ def get_manager_member_counts(
return _process_manager_member_counts(raw_member_counts)


def club_manager_url_via_login(club_id: str) -> str:
def manager_url_via_login(club_id: str) -> str:
"""Return URL of club's Club Manager page.
Parameters
Expand All @@ -110,38 +107,35 @@ def club_manager_url_via_login(club_id: str) -> str:
return f"{_MANAGER_VIA_LOGIN_BASE_URL}{club_id}/"


def _process_manager_member_counts(member_counts: dict[str, str]) -> MemberCounts:
def _process_manager_member_counts(
counts: dict[str, str],
) -> ManagerMemberCounts:
"""Process raw values.
Values are blank if there aren't any members (although they appear as zeros
during page load); convert these to 0 and ensure all are ints.
Raise exception if zero 'active members' value.
"""
processed_member_counts = {
key: int(value) if value else 0 for key, value in member_counts.items()
processed_counts = {
key: int(value) if value else 0 for key, value in counts.items()
}
# Assume an error if zero 'active' value.
# 'active' appears to be the slowest value to populate.
# 'new' will often be genuinely zero; 'expired' could be genuinely zero
if processed_member_counts["active"] == 0:
if processed_counts["active"] == 0:
error_message = (
"Active member count was zero; assuming issue with data collection. "
f"{pformat(processed_member_counts)}. "
f"{pformat(processed_counts)}. "
"Consider increasing `manager_page_load_delay`."
)
raise ValueError(error_message)

if not _is_membercounts(processed_member_counts):
raise TypeError
return processed_member_counts


def _is_membercounts(val: object) -> TypeGuard[MemberCounts]:
"""Check return type."""
if isinstance(val, dict):
return all(isinstance(v, int) for v in val.values())
return False
return ManagerMemberCounts(
active=processed_counts["active"],
expired=processed_counts["expired"],
new=processed_counts["new"],
)


def _log_info(logger: logging.Logger, message: str, start_time: float) -> None:
Expand Down
22 changes: 11 additions & 11 deletions britishcycling_clubs/profile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Functions to get information from a club's profile page."""

from typing import TypedDict
from typing import NamedTuple

import requests
from bs4 import BeautifulSoup, Tag
Expand All @@ -9,8 +9,8 @@
_REQUESTS_TIMEOUT = 10 # For `requests` library operations


class ProfileInfo(TypedDict):
"""Return type for `get_profile_info()` function."""
class ProfileInfo(NamedTuple):
"""Returned by `get_profile_info()` function."""

club_name: str
total_members: int
Expand All @@ -33,20 +33,20 @@ def get_profile_info(club_id: str) -> ProfileInfo:
`ValueError` :
if information can't be located.
"""
profile_url = club_profile_url(club_id)
r = requests.get(profile_url, timeout=_REQUESTS_TIMEOUT)
url = profile_url(club_id)
r = requests.get(url, timeout=_REQUESTS_TIMEOUT)
r.raise_for_status()
if r.url != profile_url:
if r.url != url:
error_message = f"Redirected to unexpected URL {r.url}. Is `club_id` valid?"
raise ValueError(error_message)
profile_soup = BeautifulSoup(r.content, "html.parser")
return {
"club_name": _club_name_from_profile(profile_soup),
"total_members": _total_members_from_profile(profile_soup),
}
return ProfileInfo(
club_name=_club_name_from_profile(profile_soup),
total_members=_total_members_from_profile(profile_soup),
)


def club_profile_url(club_id: str) -> str:
def profile_url(club_id: str) -> str:
"""Return URL of club's profile page.
Parameters
Expand Down
23 changes: 16 additions & 7 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,47 @@

import pytest

from britishcycling_clubs import club_manager_url_via_login
from britishcycling_clubs import manager_url_via_login
from britishcycling_clubs.manager import _process_manager_member_counts


def test_club_manager_url_via_login__happy_path() -> None:
"""Test that correct URL is returned."""
# arrange
club_id = "000"
# act
url = manager_url_via_login(club_id)
# act
assert (
club_manager_url_via_login("000")
url
== "https://www.britishcycling.org.uk/uac/connect?success_url=/dashboard/club/membership?club_id=000/"
)


def test__process_manager_member_counts__happy_path() -> None:
"""Test that raw values are converted to ints."""
# arrange
raw_counts = {
"active": "123",
"new": "",
"expired": "67",
}
assert _process_manager_member_counts(raw_counts) == {
"active": 123,
"new": 0,
"expired": 67,
}
# act
counts = _process_manager_member_counts(raw_counts)
# assert
assert counts.active == 123
assert counts.new == 0
assert counts.expired == 67


def test__process_manager_member_counts__blank_active_count_raises_exception() -> None:
"""Test that ValueError is raised if active is blank."""
# arrange
raw_counts = {
"active": "",
"new": "8",
"expired": "67",
}
# act, assert
with pytest.raises(ValueError):
_process_manager_member_counts(raw_counts)
6 changes: 2 additions & 4 deletions tests/test_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from bs4 import BeautifulSoup

from britishcycling_clubs import club_profile_url
from britishcycling_clubs import profile_url
from britishcycling_clubs.profile import (
_club_name_from_profile,
_total_members_from_profile,
Expand All @@ -11,9 +11,7 @@

def test_club_profile_url__happy_path() -> None:
"""Test that correct URL is returned."""
assert (
club_profile_url("000") == "https://www.britishcycling.org.uk/club/profile/000/"
)
assert profile_url("000") == "https://www.britishcycling.org.uk/club/profile/000/"


# Partial extract from actual page
Expand Down

0 comments on commit 9728626

Please sign in to comment.