From 5d85ed095b1278fa4a920c4b63fd55cd89b390a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Sun, 9 Oct 2022 18:21:37 +0200 Subject: [PATCH 1/5] Added 5 Artist properties --- README.md | 6 ++++ pyproject.toml | 4 +-- src/enmet/enmet.py | 83 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 89 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 46dcc26..df83b4f 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,12 @@ Note: any optional parameters in constructors that provide values related to an - Attributes and properties: - `id: str` - identifier - `name(self) -> str` + - `real_full_name(self) -> str` + - `age(self) -> str` + - `place_of_birth(self) -> str` + - `gender(self) -> str` + - `biography(self) -> str` + - `trivia(self) -> str` - `Band(EnmetEntity)`. This class represents a band. - `__init__(self, id_: str, *, name: str = None, country: Countries = None)`. `id_` is the band's identifier in Metal Archives. `name` is the band's name as stated on the band's page. `country` is the band's country of origin. - Attributes and properties: diff --git a/pyproject.toml b/pyproject.toml index 2014d94..d7728a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "enmet" -version = "0.1.0" +version = "0.2.0a" description = "Python API for Encyclopaedia Metallum (The Metal Archives) website." readme = {text = """ Enmet is a programmatic API to Encyclopaedia Metallum - The Metal Archives site. It allows convenient access to Metal Archives data from python code. @@ -42,7 +42,7 @@ classifiers = [ [project.optional-dependencies] test = [ "pytest", - "pytest-cov" + "pytest-mock" ] [project.urls] diff --git a/src/enmet/enmet.py b/src/enmet/enmet.py index dab7ee7..3c668f6 100644 --- a/src/enmet/enmet.py +++ b/src/enmet/enmet.py @@ -9,7 +9,7 @@ from os.path import expandvars, expanduser from pathlib import PurePath, Path from time import sleep -from typing import List, Optional, Tuple, Union, Iterable +from typing import List, Optional, Tuple, Union, Iterable, Type from urllib.parse import urljoin, urlparse from weakref import WeakValueDictionary @@ -414,7 +414,7 @@ def disc_names(self) -> List[Optional[str]]: return [e.text for e in self.enmet.select(".discRow td")] or [None] @cached_property - def total_times(self): + def total_times(self) -> List[Optional[str]]: return [e.text for e in self.enmet.select(".table_lyrics strong")] or [None] @cached_property @@ -439,6 +439,65 @@ def name(self): def real_full_name(self): return self._get_header_item("Real/full name:").text.strip() + @cached_property + def age(self) -> str: + return self._get_header_item("Age:").text.strip() + + @cached_property + def place_of_birth(self) -> str: + return self._get_header_item("Place of birth:").text.strip() + + @cached_property + def gender(self) -> str: + return self._get_header_item("Gender:").text + + def _get_extended_section(self, caption: str, cls_data_source: Type[_DataPage]) -> Optional[str]: + # This is a mess because the HTML for this section is a mess... + if top := self.enmet.select_one("#member_content .band_comment"): + if caption_elem := top.find("h2", string=caption): + idx_caption = top.index(caption_elem) + has_readme = False + idx=0 + for idx, elem in enumerate(top.contents[idx_caption+1:]): + if not isinstance(elem, Tag): + continue + elif elem.text == "Read more": + has_readme = True + break + elif elem.name == "h2": + break + else: + idx += 1 + if has_readme: + return getattr(cls_data_source(self.id), caption.lower()) + else: + return " ".join([e.text for e in top.contents[idx_caption+1:idx_caption+1+idx]]) + return None + + @cached_property + def biography(self) -> Optional[str]: + return self._get_extended_section("Biography", _ArtistBiographyPage) + + @cached_property + def trivia(self) -> Optional[str]: + return self._get_extended_section("Trivia", _ArtistTriviaPage) + + +class _ArtistBiographyPage(_DataPage): + RESOURCE = "artist/read-more/id/{}" + + @cached_property + def biography(self) -> str: + return self.enmet.text + + +class _ArtistTriviaPage(_DataPage): + RESOURCE = "artist/read-more/id/{}/field/trivia" + + @cached_property + def trivia(self) -> str: + return self.enmet.text + class _LyricsPage(_DataPage): RESOURCE = "release/ajax-view-lyrics/id/{}" @@ -711,6 +770,26 @@ def name(self) -> str: def real_full_name(self) -> str: return self._artist_page.real_full_name + @cached_property + def age(self) -> str: + return self._artist_page.age + + @cached_property + def place_of_birth(self) -> str: + return self._artist_page.place_of_birth + + @cached_property + def gender(self) -> str: + return self._artist_page.gender + + @cached_property + def biography(self) -> str: + return self._artist_page.biography + + @cached_property + def trivia(self) -> str: + return self._artist_page.trivia + class EntityArtist(DynamicEnmetEntity, ABC): """"Album artist or lineup artist""" From 75c6383624a7aa38c032c4100c8d76b17090948c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Sun, 9 Oct 2022 23:41:41 +0200 Subject: [PATCH 2/5] Added similar_artists to Band --- README.md | 8 ++++++++ src/enmet/enmet.py | 48 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index df83b4f..fa79a9e 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,7 @@ Note: any optional parameters in constructors that provide values related to an - `label(self) -> str` (current or last known) - `lineup(self) -> List["LineupArtist"]` (current or last known) - `discography(self) -> List["Album"]` + - `similar_artists(self) -> List["SimilarBand"]` (Note: There is naming inconseqence here on Metal Archives page - this list refers to bands, not artists, ie. persons. Property name follows Metal Archives wording, but otherwise the notion of "band" is used.) - `Disc(DynamicEnmetEntity)`. This class represents a disc of an album. More precisely, it is a container which holds some or all tracks of the album. Except for a CD, it can be in fact a physical cassette, VHS, DVD or even arbitrary partition in case of electronic releases - whatever Metal Archives considers a "disc". - `__init__(self, album_id: str, number: int = 0, bands: List[Band] = None)`. `album_id` is id of an album the disc belongs to. `number` is ordinal number of the disc on the album (counted from 0). `bands` is a list of bands that perform tracks on the disc. - Attributes and properties: @@ -159,6 +160,13 @@ Note: any optional parameters in constructors that provide values related to an - `band: Band` - the band object - `role: str` - a role that artist has in the lineup. - all remaining attributes and properties are identical as for `Artist`. +- `SimilarBand(DynamicEnmetEntity)`. This class represents a band in _Similar artists_ tab on another band's page. + - `__init__(self, id_: str, similar_to_id: str, score: str, name: str = None, country: str = None, genres: str = None)`. `id_` is the band's identifier. `similar_to_id` is the id of a band which the given band is similar to. `score` is similarity score (number of user votes). `name` is the band's name. `country` is the band's country. `genres` is the band's genres. + - Attributes and properties: + - `band: Band` - the band object + - `similar_to: Band` - the band given band is similar to + - `score: int` - similarity score. + - all remaining attributes and properties are identical as for `Band`. - `Track(EnmetEntity)`. This class represents a track on an album. It's a bit different than the other EnmetEntity classes, as tracks don't have their own resources (pages) in Metal Archives. - `__init__(self, id_, bands: List[Band], number: int = None, name: str = None, time: timedelta = None, lyrics_info: Optional[bool] = None)`. `id_` a track's identifier. `bands` is a list of bands performing on the `Disc` which the track belongs to. `number` is the track's number on the disc (counter from 1). `name` is the track's name. `time` is the track's duration. `lyrics_info` is lyrics availability status (`None` if there is no information, `True` if a link to the lyrics is available, `False` it the track is marked as _instrumental_). - Attributes and properties: diff --git a/src/enmet/enmet.py b/src/enmet/enmet.py index 3c668f6..2f8f4a0 100644 --- a/src/enmet/enmet.py +++ b/src/enmet/enmet.py @@ -223,9 +223,10 @@ def set_session(self, **kwargs) -> CachedSession: return session @lru_cache(maxsize=_BS_CACHE_SIZE) - def _cached_get(self, resource: str) -> BeautifulSoup: + def _cached_get(self, resource: str, params: Optional[Tuple[Tuple]]) -> BeautifulSoup: """Get page from Metal Archives with caching.""" response = self._session.get(urljoin(_METALLUM_URL, resource), + params=params, headers={"User-Agent": _USER_AGENT, 'Accept-Encoding': 'gzip'} ) response.raise_for_status() @@ -239,7 +240,8 @@ def __get__(self, instance, owner) -> Union[BeautifulSoup, "_CachedSite"]: self._CACHE_PATH.mkdir(parents=True, exist_ok=True) self.set_session() resource = instance.RESOURCE.format(instance.id) - return self._cached_get(resource) + params = getattr(instance, "PARAMS", None) + return self._cached_get(resource, params) class _DataPage(_Page, _CachedInstance, ABC): @@ -338,6 +340,23 @@ def lineup(self) -> List[List[Optional[str]]]: return result +class _BandRecommendationsPage(_DataPage): + RESOURCE = "band/ajax-recommendations/id/{}?showMoreSimilar=1" + PARAMS = (("showMoreSimilar", 1),) + + @cached_property + def similar_artists(self) -> List[List[str]]: + rows = self.enmet.select("#artist_list tr:not(:last-child)") + results = [] + for row in rows: + data = row.select("td") + results.append([data[0].select_one("a")["href"], data[0].text]) # Band URL, band name + results[-1].append(data[1].text) # Country + results[-1].append(data[2].text) # Genre + results[-1].append(data[3].text) # Score + return results + + class _AlbumPage(_DataPage): RESOURCE = "albums/_/_/{}" @@ -550,13 +569,15 @@ def __str__(self): class Band(EnmetEntity): """Band or artist performing as a band.""" - def __init__(self, id_: str, *, name: str = None, country: str = None): + def __init__(self, id_: str, *, name: str = None, country: str = None, genres: str = None): if not hasattr(self, "id"): super().__init__(id_) if name is not None: setattr(self, "name", name) if country is not None: setattr(self, "country", Countries[country_to_enum_name(country)]) + if genres is not None: + setattr(self, "genres", genres) self._band_page = _BandPage(self.id) self._albums_page = _DiscographyPage(self.id) @@ -609,6 +630,27 @@ def discography(self) -> List["Album"]: """List of band's albums in chronological order.""" return [Album(_url_to_id(a[0]), name=a[1], year=a[3]) for a in self._albums_page.albums] + @cached_property + def similar_artists(self) -> List["SimilarBand"]: + return [SimilarBand(_url_to_id(sa[0]), self.id, sa[4], name=sa[1], country=sa[2], genres=sa[3]) + for sa in _BandRecommendationsPage(self.id).similar_artists] + + +class SimilarBand(DynamicEnmetEntity): + def __init__(self, id_: str, similar_to_id: str, score: str, name: str = None, country: str = None, genres: str = None): + self.band = Band(id_, name=name, country=country, genres=genres) + self.similar_to = Band(similar_to_id) + self.score = int(score) + + def __dir__(self) -> List[str]: + return dir(self.band) + ["score", "similar_to"] + + def __getattr__(self, item): + return getattr(self.band, item) + + def __repr__(self): + return f"<{self.__class__.__name__}: {self.band.name} ({self.score})>" + class Album(EnmetEntity): def __init__(self, id_: str, *, name: str = None, year: int = None): From f63ba366df3b71ded8637a080a7b4c5a7c18da79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Mon, 10 Oct 2022 21:08:55 +0200 Subject: [PATCH 3/5] Tests + stripping strings in Artist's trivia/biography --- src/enmet/enmet.py | 4 ++-- test/test_enmet.py | 52 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/src/enmet/enmet.py b/src/enmet/enmet.py index 2f8f4a0..3dcb7d3 100644 --- a/src/enmet/enmet.py +++ b/src/enmet/enmet.py @@ -488,9 +488,9 @@ def _get_extended_section(self, caption: str, cls_data_source: Type[_DataPage]) else: idx += 1 if has_readme: - return getattr(cls_data_source(self.id), caption.lower()) + return getattr(cls_data_source(self.id), caption.lower()).strip() else: - return " ".join([e.text for e in top.contents[idx_caption+1:idx_caption+1+idx]]) + return " ".join([e.text.strip() for e in top.contents[idx_caption+1:idx_caption+1+idx]]) return None @cached_property diff --git a/test/test_enmet.py b/test/test_enmet.py index 8fa44c9..918ee76 100644 --- a/test/test_enmet.py +++ b/test/test_enmet.py @@ -36,10 +36,35 @@ def test_band(): assert Artist(184) is band.lineup[0].artist assert band.lineup[0].artist.real_full_name == "David Scott Mustaine" assert str(band.lineup[0].artist) == "Dave Mustaine" + assert all(x in dir(band.lineup[0]) for x in ["name_in_lineup", "band"]) assert band.discography[0].release_date == PartialDate(year=1984, month="March", day=9) - assert dir(band) == ['country', 'discography', 'formed_in', 'genres', 'label', 'lineup', 'location', - 'lyrical_themes', 'name', 'status', 'years_active'] - assert dir(band.lineup[0]) == ['band', 'name', 'name_in_lineup', 'real_full_name', 'role'] + assert set(dir(band)) == {'country', 'discography', 'formed_in', 'genres', 'label', 'lineup', 'location', + 'lyrical_themes', 'name', 'similar_artists', 'status', 'years_active'} + assert len(band.similar_artists) > 180 + assert band.similar_artists[0].score > 490 + assert band.similar_artists[0].similar_to is band + assert band.similar_artists[0].name == "Metallica" + assert "Metallica" in repr(band.similar_artists[0]) + + +def test_artist(): + # given + a = Artist(184) + # then + assert "1961" in a.age + assert a.place_of_birth == 'United States (La Mesa, California)' + assert a.gender == "Male" + assert a.biography.startswith("Mustaine was born in La Mesa") + assert a.trivia.startswith("Dave performed alongside Dream Theater") + assert set(dir(a)) == {'age', 'biography', 'gender', 'name', 'place_of_birth', 'real_full_name', 'trivia'} + + +def test_artist_less_extras(): + # given + a = Artist(14883) + # then + assert a.trivia.startswith("DiSanto was arrested") + assert a.biography is None def test_band_splitup(): @@ -56,6 +81,13 @@ def test_search_bands_set_country(mocker): assert asp_mock.mock_calls[0] == call({'bandName': 'dummy', 'country[]': ['PL']}) +def test_search_bands_no_params(): + # when + bands = search_bands() + # then + assert bands == [] + + def test_album(): album = search_albums(name="Metal Heart", band="Accept")[0] assert {a.id for a in album.lineup} == {'21647', '21529', '21621', '21592', '21656'} @@ -73,12 +105,13 @@ def test_album(): assert album.year == 1985 assert dir(album) == ['bands', 'catalog_id', 'discs', 'format', 'label', 'lineup', 'name', 'release_date', 'reviews', 'total_time', 'type', 'year'] - assert dir(album.lineup[0]) == ['album', 'name', 'name_on_album', 'real_full_name', 'role'] + assert set(dir(album.lineup[0])) == {'album', 'name', 'name_on_album', 'real_full_name', 'role', 'age', 'biography', + 'gender', 'place_of_birth', 'trivia'} assert dir(album.discs[0]) == ['name', 'number', 'total_time', 'tracks'] assert dir(album.discs[0].tracks[0]) == ['band', 'lyrics', 'name', 'number', 'time'] -def test_search_album_with_years(mocker): +def test_search_albums_with_years(mocker): # given asp_mock = mocker.patch("src.enmet.enmet._AlbumSearchPage") # when @@ -87,8 +120,17 @@ def test_search_album_with_years(mocker): assert asp_mock.mock_calls[0] == call({'releaseTitle': 'dummy', 'releaseYearFrom': 1991, 'releaseYearTo': 1992, 'releaseType[]': []}) +def test_search_albums_no_params(): + # when + albums = search_albums() + # then + assert albums == [] + + def test_album_missing_values(): + # given album = Album("3509") + # then assert album.name == "World War III" assert album.discs[0].tracks[2].name == "Vindicator" assert album.discs[0].tracks[2].time is None From 58daa73bb4528635690f853bfac212f6ecb70456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Mon, 10 Oct 2022 21:50:32 +0200 Subject: [PATCH 4/5] Add SimilarBand to __all__, PEP8 --- src/enmet/enmet.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/enmet/enmet.py b/src/enmet/enmet.py index 3dcb7d3..c7b359e 100644 --- a/src/enmet/enmet.py +++ b/src/enmet/enmet.py @@ -23,13 +23,14 @@ _logger = logging.getLogger(APPNAME) -__all__ = ["PartialDate", "ReleaseTypes", "set_session_cache", "Entity", "ExternalEntity", "EnmetEntity", "DynamicEnmetEntity", - "Band", "Album", "Disc", "Track", "Artist", "EntityArtist", "LineupArtist", "AlbumArtist", "search_bands", - "search_albums"] +__all__ = ["PartialDate", "ReleaseTypes", "set_session_cache", "Entity", "ExternalEntity", "EnmetEntity", + "DynamicEnmetEntity", "Band", "Album", "Disc", "Track", "Artist", "EntityArtist", "LineupArtist", + "AlbumArtist", "search_bands", "search_albums", "SimilarBand"] _METALLUM_URL = "https://www.metal-archives.com" # Without correct user-agent there are 4xx responses -_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.167 Safari/537.36" +_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)" \ + "Chrome/102.0.5005.167 Safari/537.36" def _url_to_id(url: str) -> str: @@ -64,7 +65,8 @@ def __repr__(self): return f"<{self.__class__.__name__}: year={self.year}, month={self.month}, day={self.day}>" def __str__(self): - return f"{self.year}{'' if self.month is None else '-{:02}'.format(self.month)}{'' if self.day is None else '-{:02}'.format(self.day)}" + return f"{self.year}{'' if self.month is None else '-{:02}'.format(self.month)}" \ + f"{'' if self.day is None else '-{:02}'.format(self.day)}" def __eq__(self, other): return self.year == other.year and self.month == other.month and self.day == other.day @@ -216,7 +218,8 @@ def __init__(self): def set_session(self, **kwargs) -> CachedSession: """Factory method for CachedSession with delay hook.""" - session = CachedSession(**({"cache_name": str(self._CACHE_PATH / self._CACHE_NAME), "backend": "sqlite"} | kwargs)) + session = CachedSession( + **({"cache_name": str(self._CACHE_PATH / self._CACHE_NAME), "backend": "sqlite"} | kwargs)) session.hooks['response'].append( lambda r, *args, **kwargs: None if not getattr(r, "from_cache", False) and sleep(1 / _CachedSite.QUERY_RATE) else None) self._session = session @@ -476,7 +479,7 @@ def _get_extended_section(self, caption: str, cls_data_source: Type[_DataPage]) if caption_elem := top.find("h2", string=caption): idx_caption = top.index(caption_elem) has_readme = False - idx=0 + idx = 0 for idx, elem in enumerate(top.contents[idx_caption+1:]): if not isinstance(elem, Tag): continue @@ -637,7 +640,8 @@ def similar_artists(self) -> List["SimilarBand"]: class SimilarBand(DynamicEnmetEntity): - def __init__(self, id_: str, similar_to_id: str, score: str, name: str = None, country: str = None, genres: str = None): + def __init__(self, id_: str, similar_to_id: str, score: str, name: str = None, country: str = None, + genres: str = None): self.band = Band(id_, name=name, country=country, genres=genres) self.similar_to = Band(similar_to_id) self.score = int(score) From 4ddbed138c4625b1f27020cb396b66564e976164 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Mon, 10 Oct 2022 22:02:04 +0200 Subject: [PATCH 5/5] Version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d7728a0..46e1df1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "enmet" -version = "0.2.0a" +version = "0.2.0" description = "Python API for Encyclopaedia Metallum (The Metal Archives) website." readme = {text = """ Enmet is a programmatic API to Encyclopaedia Metallum - The Metal Archives site. It allows convenient access to Metal Archives data from python code.