diff --git a/README.md b/README.md index 2e0f057..35c37c3 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,8 @@ The cache by default is located in `%LOCALAPPDATA%\.enmet` or `~/.enmet` directo In order to control caching, you can both obtain the default cache object (for example to clean up old entries) and set your own cache. If you use your own cache, you need to set it each time you use _Enmet_, as there is no persistent configuration for it. The function to manipulate the cache is [`set_session_cache`](#functions). +Web requests fetching images are not cached. + There is no feature to disable session caching. ### Object caching @@ -113,6 +115,8 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - `additional_notes(self) -> str` - `last_modified(self) -> datetime` (time of the last modification of the album's page) - `other_versions(self) -> List["Album"]` + - Methods: + - `def get_image(self) -> Tuple[str, str, bytes]` - album image: original file name, MIME type, binary data - `AlbumArtist(_EntityArtist)`. This class represent an artist performing on a specific album. - `__init__(self, id_: str, album_id: str, *, name: str = None, role: str = None)`. `id_` is the artist's identifier in Metal Archives. `album_id` is an album's identifier. `name` is the artist's name as stated on the album. `role` is the artist's role on the album. - Attributes and properties: @@ -137,6 +141,8 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - `misc_staff(self) -> Dict[Union[Band, ExternalEntity], List[Album]]` - `links(self) -> List[Tuple[str, str]]` - `last_modified(self) -> datetime` (time of the last modification of the artist's page) + - Methods: + - `def get_image(self) -> Tuple[str, str, bytes]` - artist image: original file name, MIME type, binary data - `Band(EnmetEntity)`. This class represents a band. - `__init__(self, id_: str, *, name: str = None, country: Countries = None)`. `id_` is the band's identifier in Metal Archives. `name` is the band's name as stated on the band's page. `country` is the band's country of origin. - Attributes and properties: @@ -162,6 +168,9 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - `links_unofficial(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name) - `links_labels(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name) - `links_tabulatures(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name) + - Methods: + - `def get_band_image(self) -> Tuple[str, str, bytes]` - band image: original file name, MIME type, binary data + - `def get_logo_image(self) -> Tuple[str, str, bytes]` - logo image: original file name, MIME type, binary data - `Disc(DynamicEnmetEntity)`. This class represents a disc of an album. More precisely, it is a container which holds some or all tracks of the album. Except for a CD, it can be in fact a physical cassette, VHS, DVD or even arbitrary partition in case of electronic releases - whatever Metal Archives considers a "disc". - `__init__(self, album_id: str, number: int = 0, bands: List[Band] = None)`. `album_id` is id of an album the disc belongs to. `number` is ordinal number of the disc on the album (counted from 0). `bands` is a list of bands that perform tracks on the disc. - Attributes and properties: diff --git a/pyproject.toml b/pyproject.toml index b15e7f7..d04322a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "enmet" -version = "0.6.0" +version = "0.7.0" description = "Python API for Encyclopaedia Metallum (The Metal Archives) website." readme = {text = """ Enmet is a programmatic API to Encyclopaedia Metallum - The Metal Archives site. It allows convenient access to Metal Archives data from python code. diff --git a/src/enmet/common.py b/src/enmet/common.py index ea71320..93fdfae 100644 --- a/src/enmet/common.py +++ b/src/enmet/common.py @@ -1,6 +1,8 @@ import logging +from abc import abstractmethod, ABC from enum import Enum from pathlib import PurePath +from typing import Tuple, Type from urllib.parse import urlparse from weakref import WeakValueDictionary @@ -60,21 +62,26 @@ def datestr_to_date(date_string: str) -> PartialDate: return PartialDate(year=int(year[0])) -class CachedInstance: +class CachedInstance(ABC): """Mixin to reuse existing objects.""" _CACHE = WeakValueDictionary() def __new__(cls, *args, **kwargs): - hash_ = cls.hash(*args, **kwargs) - if obj := CachedInstance._CACHE.get((cls.__name__, hash_)): + hash_ = cls.hash(cls, *args, **kwargs) + if obj := CachedInstance._CACHE.get(hash_): _logger.debug(f"cached get {cls.__name__} {hash_}") return obj else: _logger.debug(f"uncached get {cls.__name__} {hash_}") obj = super().__new__(cls) - CachedInstance._CACHE[(cls.__name__, hash_)] = obj + CachedInstance._CACHE[hash_] = obj return obj + @staticmethod + @abstractmethod + def hash(cls: Type, *args, **kwargs) -> int: + """Pseudo-hash to use in __new__.""" + class ReleaseTypes(Enum): """Names for release types.""" diff --git a/src/enmet/entities.py b/src/enmet/entities.py index 6214230..e8b734c 100644 --- a/src/enmet/entities.py +++ b/src/enmet/entities.py @@ -1,8 +1,12 @@ import re -from abc import ABC, abstractmethod +from abc import ABC from datetime import datetime, timedelta from functools import cached_property, reduce from inspect import getmembers +from itertools import chain +from urllib.parse import urlparse + +import requests from typing import List, Iterable, Optional, Tuple, Union, Dict from .common import CachedInstance, ReleaseTypes, url_to_id, datestr_to_date, PartialDate, BandStatuses @@ -50,7 +54,16 @@ def _turn_na_into_none(data: Union[str, List, timedelta]) -> Union[List, None, s return data -class Entity(ABC, CachedInstance): +def _get_image(url: str) -> Tuple[str, str, bytes]: + """Returns image file name, mime type/subtype and bytes""" + response = requests.get(url) + type = response.headers["Content-Type"] + name = urlparse(response.url).path.split("/")[-1] + data = response.content + return name, type, data + + +class Entity(CachedInstance, ABC): """A thing, like band or album""" def __repr__(self): return f"<{self.__class__.__name__}: {self.name}>" @@ -58,17 +71,14 @@ def __repr__(self): def __dir__(self) -> List[str]: return [p[0] for p in getmembers(self.__class__) if type(p[1]) is cached_property] - @staticmethod - @abstractmethod - def hash(*args, **kwargs) -> Tuple: - """Pseudo-hash for use in CachedInstance.__new__ to determine whether to use cache.""" + def __eq__(self, other): + return hash(self) == hash(other) class ExternalEntity(Entity): """ Non EM entity, like non-metal musician in metal album lineup. - It has only string representation and is a class just for the - sake of consistency. + Construction requires some string ("name" - actual object value) + accepts any extra attributes. """ def __init__(self, name: str, **kwargs): if not hasattr(self, "name"): @@ -79,15 +89,13 @@ def __init__(self, name: str, **kwargs): def __dir__(self) -> Iterable[str]: return vars(self) - def __eq__(self, other): - return hash(self) == hash(other) - def __hash__(self): - return hash(tuple(vars(self).values())) + # There is a potential issue here if attributes are added to instance after initialization. + return self.hash(self.__class__, vars(self).values()) @staticmethod - def hash(*args, **kwargs) -> Tuple: - return tuple(sorted(args) + sorted(kwargs.values())) + def hash(cls, *args, **kwargs) -> int: + return hash((cls, tuple(sorted(str(val) for val in chain(args, kwargs.values()))))) class EnmetEntity(Entity, ABC): @@ -99,9 +107,13 @@ def __init__(self, id_): def __repr__(self): return f"<{self.__class__.__name__}: {self.name} ({self.id})>" + def __hash__(self): + return self.hash(self.__class__, self.id) + @staticmethod - def hash(*args, **kwargs) -> Tuple: - return args[0], + def hash(cls, *args, **kwargs) -> int: + # Assuming entities of different types cannot have the same id - ??? + return hash((cls, args[0])) class DynamicEnmetEntity(Entity, ABC): @@ -219,9 +231,15 @@ def links_labels(self) -> List[Tuple[str, str]]: def links_tabulatures(self) -> List[Tuple[str, str]]: return self._links_page.links_tabulatures + def get_logo_image(self) -> Tuple[str, str, bytes]: + return _get_image(self._band_page.logo_image_link) + + def get_band_image(self) -> Tuple[str, str, bytes]: + return _get_image(self._band_page.band_image_link) + class SimilarBand(DynamicEnmetEntity): - def __init__(self, id_: str, similar_to_id: str, score: str, name: str = None, country: str = None, + def __init__(self, id_: str, similar_to_id: str, /, score: str, name: str = None, country: str = None, genres: str = None): if not "band" in self.__dict__: self.band = Band(id_, name=name, country=country, genres=genres) @@ -237,13 +255,16 @@ def __getattr__(self, item): def __repr__(self): return f"<{self.__class__.__name__}: {self.band.name} ({self.score})>" + def __hash__(self): + return self.hash(self.__class__, self.band.id, self.similar_to.id) + @staticmethod - def hash(*args, **kwargs) -> Tuple: - return args[0], args[1] + def hash(cls, *args, **kwargs) -> int: + return hash((cls, args[0], args[1])) class Album(EnmetEntity): - def __init__(self, id_: str, *, name: str = None, year: int = None): + def __init__(self, id_: str, /, *, name: str = None, year: int = None): # Have parameters for str and repr ready if not hasattr(self, "id"): super().__init__(id_) @@ -333,9 +354,12 @@ def other_versions(self) -> List["Album"]: data = AlbumVersionsPage(self.id).other_versions return [Album(url_to_id(item[0])) for item in data] + def get_image(self) -> Tuple[str, str, bytes]: + return _get_image(self._album_page.image_link) + class Disc(DynamicEnmetEntity): - def __init__(self, album_id: str, number: int = 0, bands: List[Band] = None): + def __init__(self, album_id: str, number: int = 0, /, bands: List[Band] = None): if not hasattr(self, "_number"): self._number = number self._album_page = AlbumPage(album_id) @@ -360,9 +384,12 @@ def tracks(self) -> List["Track"]: tracks.append(Track(t[0], self._bands, int(t[1]), t[2], _timestr_to_time(t[3]), t[4])) return tracks + def __hash__(self): + return self.hash(self.__class__, self._album_page.id, self._number) + @staticmethod - def hash(*args, **kwargs) -> Tuple: - return args[0], args[1] + def hash(cls, *args, **kwargs) -> int: + return hash((cls, args[0], args[1])) class Track(EnmetEntity): @@ -492,11 +519,14 @@ def last_modified(self) -> datetime: data = self._artist_page.last_modified return _timestamp_to_time(data) + def get_image(self) -> Tuple[str, str, bytes]: + return _get_image(self._artist_page.image_link) + class EntityArtist(DynamicEnmetEntity, ABC): """"Album artist or lineup artist""" - def __init__(self, id_, role: str = None): + def __init__(self, id_, role: str = None, /): if not "artist" in self.__dict__: self.artist = Artist(id_) self.role = role @@ -507,9 +537,12 @@ def __getattr__(self, item): def __dir__(self) -> List[str]: return dir(self.artist) + ["role"] + def __hash__(self): + return self.hash(self.__class__, self.artist.id, self.role) + @staticmethod - def hash(*args, **kwargs) -> Tuple: - return args[0], args[1] + def hash(cls, *args, **kwargs) -> int: + return hash((cls, args[0], args[1])) class LineupArtist(EntityArtist): diff --git a/src/enmet/pages.py b/src/enmet/pages.py index 689e8cb..b417694 100644 --- a/src/enmet/pages.py +++ b/src/enmet/pages.py @@ -135,6 +135,7 @@ def __get__(self, instance, owner) -> Union[BeautifulSoup, "_CachedSite"]: class _DataPage(_Page, CachedInstance, ABC): + """Abstract page of data (response to a data request)""" enmet = _CachedSite() @@ -149,9 +150,15 @@ def _get_header_item(self, name: str) -> Optional[Tag]: def set_session_cache(**kwargs) -> CachedSession: return _DataPage.enmet.set_session(**kwargs) + def __eq__(self, other): + return hash(self) == hash(other) + + def __hash__(self): + return self.hash(self.__class__, self.id) + @staticmethod - def hash(*args, **kwargs) -> Tuple: - return args[0], + def hash(cls, *args, **kwargs) -> int: + return hash((cls, args[0])) class DiscographyPage(_DataPage): @@ -208,7 +215,7 @@ def genres(self) -> List[str]: @cached_property def lyrical_themes(self) -> List[str]: - return _split_by_sep(self._get_header_item("Lyrical themes:").text.strip()) + return _split_by_sep(self._get_header_item("Themes:").text.strip()) @cached_property def current_label(self): @@ -254,6 +261,14 @@ def info(self) -> str: def last_modified(self) -> str: return self.enmet.find("td", string=re.compile("Last modified on")).text + @cached_property + def logo_image_link(self) -> Optional[str]: + return (link := self.enmet.select(".band_name_img img")) and link[0]["src"] + + @cached_property + def band_image_link(self) -> Optional[str]: + return (link := self.enmet.select(".band_img img")) and link[0]["src"] + class _BandInfoPage(_DataPage): RESOURCE = "band/read-more/id/{}" @@ -267,31 +282,36 @@ class BandLinksPage(_DataPage): RESOURCE = "link/ajax-list/type/band/id/{}" def _get_links(self, kind: str) -> List[Tuple[str, str]]: - data = self.enmet.select_one(f"#{kind}") - if data is None: - return [] - else: - return [(item["href"], item.text) for item in data.select("a")] + result = [] + data = self.enmet.select(f"#{kind} ~ tr") + if data is not None: + for row in data: + if row["id"].startswith("header_"): + break + else: + cell = row.select_one("a") + result.append((cell["href"], cell.text)) + return result @cached_property def links_official(self) -> List[Tuple[str, str]]: - return self._get_links("band_links_Official") + return self._get_links("header_Official") @cached_property def links_official_merchandise(self) -> List[Tuple[str, str]]: - return self._get_links("band_links_Official_merchandise") + return self._get_links("header_Official_merchandise") @cached_property def links_unofficial(self) -> List[Tuple[str, str]]: - return self._get_links("band_links_Unofficial") + return self._get_links("header_Unofficial") @cached_property def links_labels(self) -> List[Tuple[str, str]]: - return self._get_links("band_links_Labels") + return self._get_links("header_Labels") @cached_property def links_tabulatures(self) -> List[Tuple[str, str]]: - return self._get_links("band_links_Tablatures") + return self._get_links("header_Tablatures") class BandRecommendationsPage(_DataPage): @@ -420,6 +440,10 @@ def additional_notes(self) -> str: def last_modified(self) -> str: return self.enmet.find("td", string=re.compile("Last modified on")).text + @cached_property + def image_link(self) -> Optional[str]: + return (link := self.enmet.select(".album_img img")) and link[0]["src"] + class AlbumVersionsPage(_DataPage): RESOURCE = "release/ajax-versions/current/{}/parent/{}" @@ -544,6 +568,10 @@ def misc_staff(self) -> Dict[Tuple[str, ...], List[Tuple[str, ...]]]: def last_modified(self) -> str: return self.enmet.find("td", string=re.compile("Last modified on")).text + @cached_property + def image_link(self) -> Optional[str]: + return (link := self.enmet.select(".member_img img")) and link[0]["src"] + class _ArtistBiographyPage(_DataPage): diff --git a/test/test_enmet.py b/test/test_enmet.py index 6cc000b..6ca57e1 100644 --- a/test/test_enmet.py +++ b/test/test_enmet.py @@ -65,6 +65,14 @@ def test_band(): assert len(band.links_labels) == 0 assert len(band.links_official_merchandise) > 5 assert len(band.links_official) > 5 + img_file, img_type, img_data = band.get_band_image() + assert isinstance(img_file, str) + assert img_type.startswith("image/") + assert isinstance(img_data, bytes) + img_file, img_type, img_data = band.get_logo_image() + assert isinstance(img_file, str) + assert img_type.startswith("image/") + assert isinstance(img_data, bytes) def test_band_no_formed_in_no_biography(): @@ -85,7 +93,7 @@ def test_band_no_similar_artists(): def test_band_links_labels(): # then - assert len(Band(11949).links_labels) > 1 + assert len(Band("11949").links_labels) > 1 def test_artist(): @@ -102,11 +110,16 @@ def test_artist(): assert list(a.active_bands.keys()) == [Band("138")] assert set(a.past_bands) == {Band("3540464105"), Band("4984"), Band("125"), Band("3540461857"), ExternalEntity("Fallen Angels", role="Vocals, Guitars (1983)"), ExternalEntity("Panic", role="Guitars (?-1981)")} - assert set(a.guest_session) == {Band("401"), Band("37"), Band("706"), Band("343")} + assert set(a.guest_session) == {Band("401"), Band("37"), Band("706"), Band("343"), Band("59")} assert set(a.misc_staff) == {Band("138"), Band("4984"), Band("125"), Band("3540461857"), Band("401"), Band("343"), Band("25"), Band("1831")} assert len(a.links) == 10 assert isinstance(a.last_modified, datetime) + img_file, img_type, img_data = a.get_image() + assert isinstance(img_file, str) + assert img_type.startswith("image/") + assert isinstance(img_data, bytes) + def test_artist_two_extended_sections_first_no_read_more(): @@ -185,6 +198,10 @@ def test_album(): assert str(album.lineup[0]) == "Udo Dirkschneider" assert len(album.other_versions) > 20 assert isinstance(album.last_modified, datetime) + img_file, img_type, img_data = album.get_image() + assert isinstance(img_file, str) + assert img_type.startswith("image/") + assert isinstance(img_data, bytes) def test_search_albums_with_years(mocker):