From 430b71e2a032221070f8cc31327791786c477280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Mon, 6 Nov 2023 22:56:57 +0100 Subject: [PATCH 1/6] Add song search, rework Track class --- pyproject.toml | 2 +- src/enmet/common.py | 1 + src/enmet/entities.py | 78 +++++++++++++++++++++++++++---------------- src/enmet/pages.py | 38 +++++++++++++++++---- src/enmet/search.py | 32 ++++++++++++++++-- test/test_enmet.py | 8 ++--- 6 files changed, 118 insertions(+), 41 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2ff9fda..772f84f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "enmet" -version = "0.8.0" +version = "0.9.0b" description = "Python API for Encyclopaedia Metallum (The Metal Archives) website." readme = {text = """ Enmet is a programmatic API to Encyclopaedia Metallum - The Metal Archives site. It allows convenient access to Metal Archives data from python code. diff --git a/src/enmet/common.py b/src/enmet/common.py index 93fdfae..489c21d 100644 --- a/src/enmet/common.py +++ b/src/enmet/common.py @@ -81,6 +81,7 @@ def __new__(cls, *args, **kwargs): @abstractmethod def hash(cls: Type, *args, **kwargs) -> int: """Pseudo-hash to use in __new__.""" + return NotImplementedError class ReleaseTypes(Enum): diff --git a/src/enmet/entities.py b/src/enmet/entities.py index e8b734c..ad54488 100644 --- a/src/enmet/entities.py +++ b/src/enmet/entities.py @@ -1,7 +1,7 @@ import re from abc import ABC from datetime import datetime, timedelta -from functools import cached_property, reduce +from functools import cached_property, reduce, cache from inspect import getmembers from itertools import chain from urllib.parse import urlparse @@ -118,7 +118,9 @@ def hash(cls, *args, **kwargs) -> int: class DynamicEnmetEntity(Entity, ABC): """Represents entity without its own identity in Enmet, for example disc of an album""" - + @staticmethod + def hash(cls, *args, **kwargs) -> int: + return hash((cls, args[0], args[1])) class Band(EnmetEntity): """Band or artist performing as a band.""" @@ -258,10 +260,6 @@ def __repr__(self): def __hash__(self): return self.hash(self.__class__, self.band.id, self.similar_to.id) - @staticmethod - def hash(cls, *args, **kwargs) -> int: - return hash((cls, args[0], args[1])) - class Album(EnmetEntity): def __init__(self, id_: str, /, *, name: str = None, year: int = None): @@ -352,7 +350,7 @@ def last_modified(self) -> datetime: @cached_property def other_versions(self) -> List["Album"]: data = AlbumVersionsPage(self.id).other_versions - return [Album(url_to_id(item[0])) for item in data] + return [Album(url_to_id(item[0]), name=self.name, year=self.year) for item in data] def get_image(self) -> Tuple[str, str, bytes]: return _get_image(self._album_page.image_link) @@ -381,30 +379,46 @@ def total_time(self) -> timedelta: def tracks(self) -> List["Track"]: tracks = [] for t in self._album_page.tracks[self._number]: - tracks.append(Track(t[0], self._bands, int(t[1]), t[2], _timestr_to_time(t[3]), t[4])) + tracks.append(Track(t[0], t[2], self._bands, int(t[1]), _timestr_to_time(t[3]), t[4], self._album_page.id)) return tracks def __hash__(self): return self.hash(self.__class__, self._album_page.id, self._number) - @staticmethod - def hash(cls, *args, **kwargs) -> int: - return hash((cls, args[0], args[1])) - class Track(EnmetEntity): - def __init__(self, id_: str, bands: List[Band], number: int, name: str, time: timedelta = None, - lyrics_info: Optional[bool] = None): + def __init__(self, id_: str, name: str, bands: List[Band], number: int = None, + time: timedelta = None, lyrics_info: bool = ..., album_id: str = None): if not hasattr(self, "id"): super().__init__(id_) - self.number = number - self.time = time + self._number = number + self._time = time self._name = name self._lyrics_info = lyrics_info self._bands = bands + self._album_id = album_id - def __dir__(self) -> List[str]: - return super().__dir__() + ["number", "time"] + def __dir__(self): + return ["name", "number", "time", "band", "lyrics", "album"] + + @cache + def _parse_album_page(self) -> None: + track = [tr for disc in AlbumPage(self._album_id).tracks for tr in disc if tr[2].endswith(self.name)][0] + self._number = int(track[1]) + self._time = _timestr_to_time(track[3]) + self._lyrics_info = track[4] + + @cached_property + def number(self) -> int: + if self._number is None: + self._parse_album_page() + return self._number + + @cached_property + def time(self) -> timedelta: + if self._time is None: + self._parse_album_page() + return self._time @cached_property def name(self) -> str: @@ -430,13 +444,24 @@ def band(self) -> Band: @cached_property def lyrics(self) -> Optional[Union[bool, str]]: - if self._lyrics_info is None: - return None # No information - elif self._lyrics_info is False: # Instrumental - return False - else: - return LyricsPage(self.id).lyrics - + match self._lyrics_info: + case None: + return None # No information + case False: # Instrumental + return False + case _: + info = LyricsPage(self.id).lyrics + match info: + case "(lyrics not available)": + return None + case "(Instrumental)": + return False + case _: + return info + + @cached_property + def album(self) -> Album: + return Album(self._album_id) class Artist(EnmetEntity): """General artist info""" @@ -540,9 +565,6 @@ def __dir__(self) -> List[str]: def __hash__(self): return self.hash(self.__class__, self.artist.id, self.role) - @staticmethod - def hash(cls, *args, **kwargs) -> int: - return hash((cls, args[0], args[1])) class LineupArtist(EntityArtist): diff --git a/src/enmet/pages.py b/src/enmet/pages.py index 5ecca23..ab591df 100644 --- a/src/enmet/pages.py +++ b/src/enmet/pages.py @@ -93,6 +93,32 @@ def albums(self) -> List[Tuple[str, ...]]: return result +class SongSearchPage(_SearchResultsPage): + RESOURCE = "search/ajax-advanced/searching/songs/" + + @cached_property + def songs(self) -> List: + records = self._fetch_search_result() + result = [] + for item in records: + bs = BeautifulSoup(item[0], features="html.parser") + try: + band_link = bs.select_one("a")["href"] + except TypeError: # Song for a band not in MA + band_link = None + band = bs.select_one("span").text + else: + band = bs.select_one("a").text + bs = BeautifulSoup(item[1], features="html.parser") + album_link, album = bs.select_one("a")["href"], bs.select_one("a").text + release_type = item[2] + name = item[3] + bs = BeautifulSoup(item[4], features="html.parser") + id_ = bs.select_one("a")["id"].split("_")[1] + result.append((album_link, album, band_link, band, release_type, name, id_)) + return result + + class _CachedSite: """Virtual The Metal Archives site. Descriptor getting data from Metal Archives site for supported classes.""" _CACHE_PATH = Path(expandvars("%LOCALAPPDATA%") if sys.platform == "win32" else expanduser("~")) / ".enmet" @@ -391,22 +417,22 @@ def tracks(self) -> List[List[Union[int, str, Optional[bool]]]]: if len(result[0]) != 0: # Another disc result.append([]) continue - # Id + # Lyrics id - 0 result[-1].append([elem.select_one("td:nth-of-type(1) a")["name"]]) - # Number + # Number - 1 number = elem.select_one("td:nth-of-type(1)").text result[-1][-1].append(number[:number.index(".")]) - # Name + # Name - 2 result[-1][-1].append(elem.select_one("td:nth-of-type(2)").text.strip()) - # Time + # Time - 3 result[-1][-1].append(elem.select_one("td:nth-of-type(3)").text) - # Lyrics status + # Lyrics status - 4 lyrics = elem.select_one("td:nth-of-type(4)") if lyrics.select_one("a"): # Has lyrics result[-1][-1].append(True) elif lyrics.select_one("em"): # Marked as instrumental result[-1][-1].append(False) - else: + else: # Unknown result[-1][-1].append(None) return result diff --git a/src/enmet/search.py b/src/enmet/search.py index 3f7a675..09c83c4 100644 --- a/src/enmet/search.py +++ b/src/enmet/search.py @@ -2,8 +2,8 @@ from enmet import Countries, country_to_enum_name from enmet.common import ReleaseTypes, url_to_id, datestr_to_date -from enmet.entities import Band, Album -from enmet.pages import BandSearchPage, AlbumSearchPage, RandomBandPage +from enmet.entities import Band, Album, Track, ExternalEntity +from enmet.pages import BandSearchPage, AlbumSearchPage, RandomBandPage, SongSearchPage __all__ = ["search_albums", "search_bands", "random_band"] @@ -65,6 +65,34 @@ def search_albums(*, name: str = None, strict: bool = None, band: str = None, ba in AlbumSearchPage(params).albums] +_SONG_SEARCH_FIELDS_MAPPING = { + "name": "songTitle", + "strict": "exactSongMatch", + "band": "bandName", + "band_strict": "exactBandMatch", + "album": "releaseTitle", + "album_strict": "exactReleaseMatch", + "lyrics": "lyrics", + "genre": "genre", + "release_types": "releaseType[]" +} + + +def search_songs(*, name: str = None, strict: bool = None, band: str = None, band_strict: bool = None, + album: str = None, album_strict: bool = None, lyrics: str = None, genre: str = None, + release_types: List[ReleaseTypes] = None): + if not any(locals().values()): + return [] + params = {_SONG_SEARCH_FIELDS_MAPPING[k]: v or "" for k, v in locals().items()} + params[_SONG_SEARCH_FIELDS_MAPPING["release_types"]] = [_RELEASE_TYPE_IDS[rt] for rt in release_types or []] + return [Track(s[6], + s[5], + [Band(url_to_id(s[2])) if s[2] else ExternalEntity(s[3])], + album_id=url_to_id(s[0])) + for s + in SongSearchPage(params).songs] + + def random_band() -> Band: """Just get a random band.""" return Band(url_to_id(RandomBandPage().band)) diff --git a/test/test_enmet.py b/test/test_enmet.py index b0bcf00..fe85938 100644 --- a/test/test_enmet.py +++ b/test/test_enmet.py @@ -192,8 +192,8 @@ def test_album(): assert set(dir(album.lineup[0])) == {'active_bands', 'age', 'album', 'biography', 'gender', 'guest_session', 'links', 'misc_staff', 'name', 'name_on_album', 'past_bands', 'place_of_birth', 'real_full_name', 'role', 'trivia', 'last_modified'} - assert dir(album.discs[0]) == ['name', 'number', 'total_time', 'tracks'] - assert dir(album.discs[0].tracks[0]) == ['band', 'lyrics', 'name', 'number', 'time'] + assert set(dir(album.discs[0])) == {'name', 'number', 'total_time', 'tracks'} + assert set(dir(album.discs[0].tracks[0])) == {'album', 'band', 'lyrics', 'name', 'number', 'time'} assert "AlbumArtist" in repr(album.lineup[0]) assert str(album.lineup[0]) == "Udo Dirkschneider" assert len(album.other_versions) > 20 @@ -313,7 +313,7 @@ def test_datestr_to_date(datestr, year, month, day): def test_Track_no_band_for_track(): # given b1, b2 = SimpleNamespace(name="b1"), SimpleNamespace(name="b2") - t = Track("1", [b1, b2], 1, name="test123") + t = Track("1", "test123", [b1, b2], 1) # then with pytest.raises(ValueError): _ = t.band @@ -348,7 +348,7 @@ def select_one(self, _): def test_track_split_name_without_band(): # given b1, b2 = SimpleNamespace(name="b1"), SimpleNamespace(name="b2") - t = Track("123", [b1, b2], 1, "name1") + t = Track("123", "name1", [b1, b2], 1) # when name = t.name # then From fb216f01b7d71f627ce79578035331de671c6a5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Tue, 7 Nov 2023 20:26:37 +0100 Subject: [PATCH 2/6] Some code cleanups, doc updates --- README.md | 165 +++++++++++++++++++++++++----------------- src/enmet/common.py | 1 - src/enmet/entities.py | 14 ++-- 3 files changed, 104 insertions(+), 76 deletions(-) diff --git a/README.md b/README.md index 8a00deb..9d1e65e 100644 --- a/README.md +++ b/README.md @@ -85,13 +85,13 @@ megadeth = enmet.search_bands(name="Megadeth")[0] megadeth2 = enmet.Band("138") print(megadeth is megadeth2) ``` -- To optimise memory usage, only actually used objects are cached. Once an object is nowehere referenced in your code, it is removed from the cache. +- To optimise memory usage, only actually used objects are cached. Once an object is nowhere referenced in your code, it is removed from the cache. ## Reference manual Note: Any optional parameters in constructors that provide values related to an entity and which are not provided when creating the object, are resolved lazily later. -Note: Any "empty" values are returned as `None` or `[]`. This refers both to values nonexistent for a given entity and values with equvalen meaning (like "N/A", "Unknown" etc.). +Note: Any "empty" values are returned as `None` or `[]`. This refers both to values nonexistent for a given entity and values with equvalent meaning (like "N/A", "Unknown" etc.). ### Classes @@ -99,24 +99,24 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - `__init__(self, id_: str, *, name: str = None, year: int = None)` `id_` is album identifier in Metal Archives. `name` is album name as appearing on the album's page. `year` is album release year. - Attributes and properties: - `id: str` - identifier - - `name(self) -> str` - - `bands(self) -> List[Band]` - - `type(self) -> ReleaseTypes` - - `year(self) -> int` - - `release_date(self) -> PartialDate` - - `label(self) -> str` - - `format(self) -> str` - - `reviews(self) -> Tuple[str, str]` - - `discs(self) -> List[Disc]` - - `lineup(self) -> List[AlbumArtist]` - - `total_time(self) -> timedelta` - - `guest_session_musicians(self) -> List["AlbumArtist"]` - - `other_staff(self) -> List["AlbumArtist"]` - - `additional_notes(self) -> str` - - `last_modified(self) -> datetime` (time of the last modification of the album's page) - - `other_versions(self) -> List["Album"]` + - `name -> str` + - `bands -> List[Band]` + - `type -> ReleaseTypes` + - `year -> int` + - `release_date -> PartialDate` + - `label -> str` + - `format -> str` + - `reviews -> Tuple[str, str]` + - `discs -> List[Disc]` + - `lineup -> List[AlbumArtist]` + - `total_time -> timedelta` + - `guest_session_musicians -> List["AlbumArtist"]` + - `other_staff -> List["AlbumArtist"]` + - `additional_notes -> str` + - `last_modified -> datetime` (time of the last modification of the album's page) + - `other_versions -> List["Album"]` - Methods: - - `def get_image(self) -> Tuple[str, str, bytes]` - album image: original file name, MIME type, binary data + - `get_image() -> Tuple[str, str, bytes]` - album image: original file name, MIME type, binary data - `AlbumArtist(_EntityArtist)`. This class represent an artist performing on a specific album. - `__init__(self, id_: str, album_id: str, *, name: str = None, role: str = None)`. `id_` is the artist's identifier in Metal Archives. `album_id` is an album's identifier. `name` is the artist's name as stated on the album. `role` is the artist's role on the album. - Attributes and properties: @@ -128,56 +128,56 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - `__init__(self, id_)`. `id_` is artist identifier in Metal Archives. - Attributes and properties: - `id: str` - identifier - - `name(self) -> str` - - `real_full_name(self) -> str` - - `age(self) -> str` - - `place_of_birth(self) -> str` - - `gender(self) -> str` - - `biography(self) -> str` - - `trivia(self) -> str` - - `active_bands(self) -> Dict[Union[Band, ExternalEntity], List[Album]]` - - `past_bands(self) -> Dict[Union[Band, ExternalEntity], List[Album]]` - - `guest_session(self) -> Dict[Union[Band, ExternalEntity], List[Album]]` - - `misc_staff(self) -> Dict[Union[Band, ExternalEntity], List[Album]]` - - `links(self) -> List[Tuple[str, str]]` - - `last_modified(self) -> datetime` (time of the last modification of the artist's page) + - `name -> str` + - `real_full_name -> str` + - `age -> str` + - `place_of_birth -> str` + - `gender -> str` + - `biography -> str` + - `trivia -> str` + - `active_bands -> Dict[Union[Band, ExternalEntity], List[Album]]` + - `past_bands -> Dict[Union[Band, ExternalEntity], List[Album]]` + - `guest_session -> Dict[Union[Band, ExternalEntity], List[Album]]` + - `misc_staff -> Dict[Union[Band, ExternalEntity], List[Album]]` + - `links -> List[Tuple[str, str]]` + - `last_modified -> datetime` (time of the last modification of the artist's page) - Methods: - - `def get_image(self) -> Tuple[str, str, bytes]` - artist image: original file name, MIME type, binary data + - `get_image() -> Tuple[str, str, bytes]` - artist image: original file name, MIME type, binary data - `Band(EnmetEntity)`. This class represents a band. - `__init__(self, id_: str, *, name: str = None, country: Countries = None)`. `id_` is the band's identifier in Metal Archives. `name` is the band's name as stated on the band's page. `country` is the band's country of origin. - Attributes and properties: - `id: str` - identifier - - `name(self) -> str` - - `country(self) -> Countries` - - `location(self) -> str` - - `formed_in(self) -> int` - - `years_active(self) -> List[str]` - - `genres(self) -> List[str]` - - `lyrical_themes(self) -> List[str]` - - `label(self) -> str` (current or last known) - - `lineup(self) -> List["LineupArtist"]` (current or last known) - - `discography(self) -> List["Album"]` - - `similar_artists(self) -> List["SimilarBand"]` (Note: There is naming inconseqence here on Metal Archives page - this list refers to bands, not artists, ie. persons. Property name follows Metal Archives wording, but otherwise the notion of "band" is used.) - - `past_members(self) -> List["LineupArtist"]` - - `live_musicians(self) -> List["LineupArtist"]` - - `info(self) -> str` (free text information below header items) - - `last_modified(self) -> datetime` (date of the last band page modification) - - `status(self) -> Optional[BandStatuses]` - - `links_official(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name) - - `links_official_merchandise(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name) - - `links_unofficial(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name) - - `links_labels(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name) - - `links_tabulatures(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name) + - `name -> str` + - `country -> Countries` + - `location -> str` + - `formed_in -> int` + - `years_active -> List[str]` + - `genres -> List[str]` + - `lyrical_themes -> List[str]` + - `label -> str` (current or last known) + - `lineup -> List["LineupArtist"]` (current or last known) + - `discography -> List["Album"]` + - `similar_artists -> List["SimilarBand"]` (Note: There is naming inconseqence here on Metal Archives page - this list refers to bands, not artists, ie. persons. Property name follows Metal Archives wording, but otherwise the notion of "band" is used.) + - `past_members -> List["LineupArtist"]` + - `live_musicians -> List["LineupArtist"]` + - `info -> str` (free text information below header items) + - `last_modified -> datetime` (date of the last band page modification) + - `status -> Optional[BandStatuses]` + - `links_official -> List[Tuple[str, str]]` (returns list or tuples- url, page name) + - `links_official_merchandise -> List[Tuple[str, str]]` (returns list or tuples- url, page name) + - `links_unofficial -> List[Tuple[str, str]]` (returns list or tuples- url, page name) + - `links_labels -> List[Tuple[str, str]]` (returns list or tuples- url, page name) + - `links_tabulatures -> List[Tuple[str, str]]` (returns list or tuples- url, page name) - Methods: - - `def get_band_image(self) -> Tuple[str, str, bytes]` - band image: original file name, MIME type, binary data - - `def get_logo_image(self) -> Tuple[str, str, bytes]` - logo image: original file name, MIME type, binary data + - `get_band_image() -> Tuple[str, str, bytes]` - band image: original file name, MIME type, binary data + - `get_logo_image() -> Tuple[str, str, bytes]` - logo image: original file name, MIME type, binary data - `Disc(DynamicEnmetEntity)`. This class represents a disc of an album. More precisely, it is a container which holds some or all tracks of the album. Except for a CD, it can be in fact a physical cassette, VHS, DVD or even arbitrary partition in case of electronic releases - whatever Metal Archives considers a "disc". - `__init__(self, album_id: str, number: int = 0, bands: List[Band] = None)`. `album_id` is id of an album the disc belongs to. `number` is ordinal number of the disc on the album (counted from 0). `bands` is a list of bands that perform tracks on the disc. - Attributes and properties: - - `number(self) ->int` (disc number on the album counted from 1) - - `name(self) -> Optional[str]` (disc name or None if the disc has no specific name) - - `total_time(self) -> timedelta` - - `tracks(self) -> List["Track"]` + - `number ->int` (disc number on the album counted from 1) + - `name -> Optional[str]` (disc name or None if the disc has no specific name) + - `total_time -> timedelta` + - `tracks -> List["Track"]` - `ExternalEntity(Entity)`. This class represents entity external to Metal Archives, for example band or artist which appear on metal albums, but is not represented in Metal Archives itself. - `__init__(self, name: str):` `name` is data to store for the entity. - Attributes and properties: @@ -196,14 +196,29 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - `similar_to: Band` - the band given band is similar to - `score: int` - similarity score. - all remaining attributes and properties are identical as for `Band`. -- `Track(EnmetEntity)`. This class represents a track on an album. It's a bit different than the other EnmetEntity classes, as tracks don't have their own resources (pages) in Metal Archives. - - `__init__(self, id_, bands: List[Band], number: int = None, name: str = None, time: timedelta = None, lyrics_info: Optional[bool] = None)`. `id_` a track's identifier. `bands` is a list of bands performing on the `Disc` which the track belongs to. `number` is the track's number on the disc (counter from 1). `name` is the track's name. `time` is the track's duration. `lyrics_info` is lyrics availability status (`None` if there is no information, `True` if a link to the lyrics is available, `False` it the track is marked as _instrumental_). +- `Track(EnmetEntity)`. This class represents a track on an album. It's a bit different than the other EnmetEntity + classes, as tracks don't have their own resources (pages) in The Metal Archives. + - `__init__(self, id_: str, name: str, bands: List[Band], number: int = None, time: timedelta = None, lyrics_info: + bool = ..., album_id: str = None):`. `id_` is the track's identifier (actually it's more like lyrics identifier). + `name` is the track's name. `bands` is a list of bands + performing on the `Disc` which the track belongs to. In case of fe. split releases, band is part of the track's + name + displayed in the MA site. `number` is + the track's number on the disc (counter from 1). `time` is the track's duration. `lyrics_info` is lyrics + availability status (`None` if there is no information about lyrics in The MA, `True` if a link to the lyrics is + available, + `False` it + the track is marked as _instrumental_, `...` if this information is missing when object is created). `album_id` + is an identifier of the album the track belongs to. - Attributes and properties: + - `id: str` (it is more like lyrics identifier) - `number: int` (the track's number on a disc counted from 1) - `time: timedelta` (the track's duration) - - `name(self) -> str` - - `band(self) -> Band` - - `lyrics(self) -> Optional[Union[bool, str]]` (lyrics; `False` if the track is marked as instrumental, `None` if there is no track informaction, lyrics text otherwise) + - `name -> str` + - `band -> Band` + - `lyrics -> Optional[Union[bool, str]]` (lyrics: `False` if the track is marked as instrumental, `None` + if there is no track information, lyrics text otherwise) + - `album -> Album` ### Functions @@ -219,7 +234,10 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - `genre` - genre name (substring matching) - `countries` - list of Countries enum members - `formed_from` and `formed_to` - year range for band formation -- `search_albums(*, name: str = None, strict: bool = None, band: str = None, band_strict: bool = None, year_from: int = None, month_from: int = None, year_to: int = None, month_to: int = None, genre: str = None, release_types: List[ReleaseTypes] = None)`. This function searches for albums, returning a list of `Album` objects. Parameters: +- `search_albums(*, name: str = None, strict: bool = None, band: str = None, band_strict: bool = None, year_from: + int = None, month_from: int = None, year_to: int = None, month_to: int = None, genre: str = None, release_types: + List[ReleaseTypes] = None) -> List[Album]`. This function searches for albums, returning a list of `Album` objects. + Parameters: - `name` - album name - `strict` - force strict matching for `name` (case-insensitive) - `band` - name of a band performing the album @@ -227,6 +245,21 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - `year_from`, `month_from`, `year_to`, `month_to` - time range for album release date - `genre` - genre name (substring matching) - `release_types` - list of ReleaseType enum members +- `search_songs(*, name: str = None, strict: bool = None, band: str = None, band_strict: bool = None, album: str = + None, album_strict: bool = None, lyrics: str = None, genre: str = None, release_types: List[ReleaseTypes] = None) + -> List[Track]`. This function searches for tracks, returning a list of `Track` objects. Parameters: + - `name` - track name + - `strict` - force strict matching for `name` (case-insensitive) + - `band` - name of a band performing the track + - `band_strict` - force strict matching for `band_name` (case-insensitive) + - `album` - name of an album the track appears on + - `album_strict` - force strict matching for `album` (case-insensitive) + - `lyrics` - substring matching for the track's lyrics. If multiple words are provided, they are joined + using + AND operator (so all the words must appear in the lyrics to satisfy the search). To search for an exact phrase, + you need to enclose it in double quotes, fe. `lyrics='"My Valkyrie"'`. + - `genre` - substring matching for genres of bands performing the track + - `release_types` - release types to consider during searching - `random_band() -> Band` - get a random band from The Metal Archives. This function is used mainly for testing. ### Enums diff --git a/src/enmet/common.py b/src/enmet/common.py index 489c21d..93fdfae 100644 --- a/src/enmet/common.py +++ b/src/enmet/common.py @@ -81,7 +81,6 @@ def __new__(cls, *args, **kwargs): @abstractmethod def hash(cls: Type, *args, **kwargs) -> int: """Pseudo-hash to use in __new__.""" - return NotImplementedError class ReleaseTypes(Enum): diff --git a/src/enmet/entities.py b/src/enmet/entities.py index ad54488..a133f18 100644 --- a/src/enmet/entities.py +++ b/src/enmet/entities.py @@ -401,7 +401,6 @@ def __init__(self, id_: str, name: str, bands: List[Band], number: int = None, def __dir__(self): return ["name", "number", "time", "band", "lyrics", "album"] - @cache def _parse_album_page(self) -> None: track = [tr for disc in AlbumPage(self._album_id).tracks for tr in disc if tr[2].endswith(self.name)][0] self._number = int(track[1]) @@ -449,20 +448,17 @@ def lyrics(self) -> Optional[Union[bool, str]]: return None # No information case False: # Instrumental return False + case True: + return LyricsPage(self.id).lyrics case _: - info = LyricsPage(self.id).lyrics - match info: - case "(lyrics not available)": - return None - case "(Instrumental)": - return False - case _: - return info + self._parse_album_page() + return self.lyrics @cached_property def album(self) -> Album: return Album(self._album_id) + class Artist(EnmetEntity): """General artist info""" From c32b060dc94f1a31fa4cb73d3f964acd583d0f5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Sat, 11 Nov 2023 20:54:24 +0100 Subject: [PATCH 3/6] Fix Album.reviews and Artist.age for empty values, fix Album.additional_notes for empty value, cleanup --- README.md | 28 ++++++---------------------- src/enmet/entities.py | 7 ++++--- src/enmet/pages.py | 10 +++++++--- src/enmet/search.py | 10 +++++----- 4 files changed, 22 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 9d1e65e..b0ba286 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,8 @@ print(megadeth is megadeth2) Note: Any optional parameters in constructors that provide values related to an entity and which are not provided when creating the object, are resolved lazily later. -Note: Any "empty" values are returned as `None` or `[]`. This refers both to values nonexistent for a given entity and values with equvalent meaning (like "N/A", "Unknown" etc.). +Note: Any "empty" values are returned as `None` or `[]`. This refers both to values nonexistent for a given entity +and values with equivalent meaning (like "N/A", "Unknown" etc.). ### Classes @@ -198,26 +199,14 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - all remaining attributes and properties are identical as for `Band`. - `Track(EnmetEntity)`. This class represents a track on an album. It's a bit different than the other EnmetEntity classes, as tracks don't have their own resources (pages) in The Metal Archives. - - `__init__(self, id_: str, name: str, bands: List[Band], number: int = None, time: timedelta = None, lyrics_info: - bool = ..., album_id: str = None):`. `id_` is the track's identifier (actually it's more like lyrics identifier). - `name` is the track's name. `bands` is a list of bands - performing on the `Disc` which the track belongs to. In case of fe. split releases, band is part of the track's - name - displayed in the MA site. `number` is - the track's number on the disc (counter from 1). `time` is the track's duration. `lyrics_info` is lyrics - availability status (`None` if there is no information about lyrics in The MA, `True` if a link to the lyrics is - available, - `False` it - the track is marked as _instrumental_, `...` if this information is missing when object is created). `album_id` - is an identifier of the album the track belongs to. + - `__init__(self, id_: str, name: str, bands: List[Band], number: int = None, time: timedelta = None, lyrics_info: bool = ..., album_id: str = None):`. `id_` is the track's identifier (actually it's more like lyrics identifier). `name` is the track's name. `bands` is a list of bands performing on the `Disc` which the track belongs to. In case of fe. split releases, band is part of the track's name displayed in the MA site. `number` is the track's number on the disc (counter from 1). `time` is the track's duration. `lyrics_info` is lyrics availability status (`None` if there is no information about lyrics in The MA, `True` if a link to the lyrics is available, `False` it the track is marked as _instrumental_, `...` if this information is missing when object is created). `album_id` is an identifier of the album the track belongs to. - Attributes and properties: - `id: str` (it is more like lyrics identifier) - `number: int` (the track's number on a disc counted from 1) - `time: timedelta` (the track's duration) - `name -> str` - `band -> Band` - - `lyrics -> Optional[Union[bool, str]]` (lyrics: `False` if the track is marked as instrumental, `None` - if there is no track information, lyrics text otherwise) + - `lyrics -> Optional[Union[bool, str]]` (lyrics: `False` if the track is marked as instrumental, `None` if there is no track information, lyrics text otherwise) - `album -> Album` @@ -245,19 +234,14 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val - `year_from`, `month_from`, `year_to`, `month_to` - time range for album release date - `genre` - genre name (substring matching) - `release_types` - list of ReleaseType enum members -- `search_songs(*, name: str = None, strict: bool = None, band: str = None, band_strict: bool = None, album: str = - None, album_strict: bool = None, lyrics: str = None, genre: str = None, release_types: List[ReleaseTypes] = None) - -> List[Track]`. This function searches for tracks, returning a list of `Track` objects. Parameters: +- `search_songs(*, name: str = None, strict: bool = None, band: str = None, band_strict: bool = None, album: str = None, album_strict: bool = None, lyrics: str = None, genre: str = None, release_types: List[ReleaseTypes] = None) -> List[Track]`. This function searches for tracks, returning a list of `Track` objects. Parameters: - `name` - track name - `strict` - force strict matching for `name` (case-insensitive) - `band` - name of a band performing the track - `band_strict` - force strict matching for `band_name` (case-insensitive) - `album` - name of an album the track appears on - `album_strict` - force strict matching for `album` (case-insensitive) - - `lyrics` - substring matching for the track's lyrics. If multiple words are provided, they are joined - using - AND operator (so all the words must appear in the lyrics to satisfy the search). To search for an exact phrase, - you need to enclose it in double quotes, fe. `lyrics='"My Valkyrie"'`. + - `lyrics` - substring matching for the track's lyrics. If multiple words are provided, they are joined using AND operator (so all the words must appear in the lyrics to satisfy the search). To search for an exact phrase, you need to enclose it in double quotes, fe. `lyrics='"My Valkyrie"'`. - `genre` - substring matching for genres of bands performing the track - `release_types` - release types to consider during searching - `random_band() -> Band` - get a random band from The Metal Archives. This function is used mainly for testing. diff --git a/src/enmet/entities.py b/src/enmet/entities.py index a133f18..e6099be 100644 --- a/src/enmet/entities.py +++ b/src/enmet/entities.py @@ -308,7 +308,8 @@ def format(self) -> str: @cached_property def reviews(self) -> Tuple[str, str]: - return self._album_page.reviews + data = self._album_page.reviews + return None if data[0] is None else data @cached_property def catalog_id(self) -> str: @@ -339,7 +340,7 @@ def total_time(self) -> Optional[timedelta]: reduce(timedelta.__add__, [disc.total_time for disc in self.discs if disc.total_time], timedelta())) @cached_property - def additional_notes(self) -> str: + def additional_notes(self) -> Optional[str]: return self._album_page.additional_notes @cached_property @@ -483,7 +484,7 @@ def real_full_name(self) -> str: @cached_property def age(self) -> str: - return self._artist_page.age + return _turn_na_into_none(self._artist_page.age) @cached_property def place_of_birth(self) -> str: diff --git a/src/enmet/pages.py b/src/enmet/pages.py index ab591df..937e04a 100644 --- a/src/enmet/pages.py +++ b/src/enmet/pages.py @@ -13,7 +13,7 @@ from requests import get from requests_cache import CachedSession -from enmet.common import CachedInstance +from .common import CachedInstance __all__ = ["set_session_cache"] @@ -466,8 +466,12 @@ def other_staff(self) -> List[List[str]]: return self._get_people("#album_members_misc") @cached_property - def additional_notes(self) -> str: - return self.enmet.select_one("#album_tabs_notes").text.strip() + def additional_notes(self) -> Optional[str]: + item = self.enmet.select_one("#album_tabs_notes") + if item: + return item.text.strip() + else: + return None @cached_property def last_modified(self) -> str: diff --git a/src/enmet/search.py b/src/enmet/search.py index 09c83c4..c4013b9 100644 --- a/src/enmet/search.py +++ b/src/enmet/search.py @@ -1,11 +1,11 @@ from typing import List -from enmet import Countries, country_to_enum_name -from enmet.common import ReleaseTypes, url_to_id, datestr_to_date -from enmet.entities import Band, Album, Track, ExternalEntity -from enmet.pages import BandSearchPage, AlbumSearchPage, RandomBandPage, SongSearchPage +from .countries import Countries, country_to_enum_name +from .common import ReleaseTypes, url_to_id, datestr_to_date +from .entities import Band, Album, Track, ExternalEntity +from .pages import BandSearchPage, AlbumSearchPage, RandomBandPage, SongSearchPage -__all__ = ["search_albums", "search_bands", "random_band"] +__all__ = ["search_albums", "search_bands", "search_songs", "random_band"] _RELEASE_TYPE_IDS = {ReleaseTypes.FULL: 1, ReleaseTypes.LIVE: 2, ReleaseTypes.DEMO: 3, ReleaseTypes.SINGLE: 4, ReleaseTypes.EP: 5, ReleaseTypes.VIDEO: 6, ReleaseTypes.BOX: 7, ReleaseTypes.SPLIT: 8, From 43fbebdb9fb2eefedbc0b7c81313f5aff42e3e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Sat, 6 Jan 2024 23:21:20 +0100 Subject: [PATCH 4/6] Update unit tests --- test/test_enmet.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_enmet.py b/test/test_enmet.py index fe85938..a46844e 100644 --- a/test/test_enmet.py +++ b/test/test_enmet.py @@ -32,7 +32,7 @@ def test_band(): assert band.genres == ["Thrash Metal (early/later)", "Heavy Metal/Rock (mid)"] assert set(band.lyrical_themes) == set(["Politics", "Love", "Addiction", "History", "Death", "Religion", "Society", "New World Order"]) assert band.label == "Tradecraft" - assert {a.id for a in band.lineup} == {"184", "2836", "3826", "1391"} + assert {a.id for a in band.lineup} == {"184", "2836", "1900", "1391"} assert repr(band.lineup[0]) == "" assert str(band.lineup[0]) == "Dave Mustaine" assert Artist("184") is band.lineup[0].artist @@ -59,7 +59,7 @@ def test_band(): 'similar_to', 'status', 'years_active', 'links_labels', 'links_official', 'links_official_merchandise', 'links_tabulatures', 'links_unofficial', } - assert band.info.startswith("Pictured from left to right") + assert band.info.startswith("Contact: webmaster@megadeth.com") assert band.last_modified >= datetime(2022, 10, 10, 15, 58, 54) assert len(band.links_tabulatures) > 5 assert len(band.links_unofficial) > 3 @@ -337,12 +337,12 @@ class Dummy: text = "text123" def select_one(self, _): - return None + return {"href": "abc"} mocker.patch("enmet.pages._DataPage._get_header_item", lambda p1, p2: Dummy()) # when b = Album("dummy").reviews # then - assert b == (None, "text123") + assert b == ("abc", "text123") def test_track_split_name_without_band(): From c909ec1b4a131a1ca5dcf925c71b86d2cc875d36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Fri, 5 Jul 2024 22:38:48 +0200 Subject: [PATCH 5/6] Update unit tests --- src/enmet/entities.py | 1 + test/test_enmet.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/enmet/entities.py b/src/enmet/entities.py index e6099be..54a1303 100644 --- a/src/enmet/entities.py +++ b/src/enmet/entities.py @@ -122,6 +122,7 @@ class DynamicEnmetEntity(Entity, ABC): def hash(cls, *args, **kwargs) -> int: return hash((cls, args[0], args[1])) + class Band(EnmetEntity): """Band or artist performing as a band.""" def __init__(self, id_: str, *, name: str = None, country: Countries = None, genres: str = None): diff --git a/test/test_enmet.py b/test/test_enmet.py index a46844e..7b51144 100644 --- a/test/test_enmet.py +++ b/test/test_enmet.py @@ -110,7 +110,7 @@ def test_artist(): 'past_bands', 'place_of_birth', 'real_full_name', 'trivia', 'last_modified'} assert list(a.active_bands.keys()) == [Band("138")] assert set(a.past_bands) == {Band("3540464105"), Band("4984"), Band("125"), Band("3540461857"), - ExternalEntity("Fallen Angels", role="Vocals, Guitars (1983)"), ExternalEntity("Panic", role="Guitars (?-1981)")} + ExternalEntity("Fallen Angels", role="Vocals, Guitars (1983)"), ExternalEntity("Panic", role="Guitars (1978-1981)")} assert set(a.guest_session) == {Band("401"), Band("37"), Band("706"), Band("343"), Band("59")} assert set(a.misc_staff) == {Band("138"), Band("4984"), Band("125"), Band("3540461857"), Band("401"), Band("343"), Band("1831")} assert len(a.links) == 10 From 9ba538ec1833c8957a5af2f42a705641c454f9f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Fri, 5 Jul 2024 22:45:13 +0200 Subject: [PATCH 6/6] Version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 772f84f..bdca465 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "enmet" -version = "0.9.0b" +version = "0.9.0" description = "Python API for Encyclopaedia Metallum (The Metal Archives) website." readme = {text = """ Enmet is a programmatic API to Encyclopaedia Metallum - The Metal Archives site. It allows convenient access to Metal Archives data from python code.