Skip to content

Commit

Permalink
Merge pull request #7 from lukjak/0.7.0
Browse files Browse the repository at this point in the history
0.7.0
  • Loading branch information
lukjak authored Feb 26, 2023
2 parents 34d485e + 281f065 commit 601852a
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 46 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ The cache by default is located in `%LOCALAPPDATA%\.enmet` or `~/.enmet` directo

In order to control caching, you can both obtain the default cache object (for example to clean up old entries) and set your own cache. If you use your own cache, you need to set it each time you use _Enmet_, as there is no persistent configuration for it. The function to manipulate the cache is [`set_session_cache`](#functions).

Web requests fetching images are not cached.

There is no feature to disable session caching.

### Object caching
Expand Down Expand Up @@ -113,6 +115,8 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val
- `additional_notes(self) -> str`
- `last_modified(self) -> datetime` (time of the last modification of the album's page)
- `other_versions(self) -> List["Album"]`
- Methods:
- `def get_image(self) -> Tuple[str, str, bytes]` - album image: original file name, MIME type, binary data
- `AlbumArtist(_EntityArtist)`. This class represent an artist performing on a specific album.
- `__init__(self, id_: str, album_id: str, *, name: str = None, role: str = None)`. `id_` is the artist's identifier in Metal Archives. `album_id` is an album's identifier. `name` is the artist's name as stated on the album. `role` is the artist's role on the album.
- Attributes and properties:
Expand All @@ -137,6 +141,8 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val
- `misc_staff(self) -> Dict[Union[Band, ExternalEntity], List[Album]]`
- `links(self) -> List[Tuple[str, str]]`
- `last_modified(self) -> datetime` (time of the last modification of the artist's page)
- Methods:
- `def get_image(self) -> Tuple[str, str, bytes]` - artist image: original file name, MIME type, binary data
- `Band(EnmetEntity)`. This class represents a band.
- `__init__(self, id_: str, *, name: str = None, country: Countries = None)`. `id_` is the band's identifier in Metal Archives. `name` is the band's name as stated on the band's page. `country` is the band's country of origin.
- Attributes and properties:
Expand All @@ -162,6 +168,9 @@ Note: Any "empty" values are returned as `None` or `[]`. This refers both to val
- `links_unofficial(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name)
- `links_labels(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name)
- `links_tabulatures(self) -> List[Tuple[str, str]]` (returns list or tuples- url, page name)
- Methods:
- `def get_band_image(self) -> Tuple[str, str, bytes]` - band image: original file name, MIME type, binary data
- `def get_logo_image(self) -> Tuple[str, str, bytes]` - logo image: original file name, MIME type, binary data
- `Disc(DynamicEnmetEntity)`. This class represents a disc of an album. More precisely, it is a container which holds some or all tracks of the album. Except for a CD, it can be in fact a physical cassette, VHS, DVD or even arbitrary partition in case of electronic releases - whatever Metal Archives considers a "disc".
- `__init__(self, album_id: str, number: int = 0, bands: List[Band] = None)`. `album_id` is id of an album the disc belongs to. `number` is ordinal number of the disc on the album (counted from 0). `bands` is a list of bands that perform tracks on the disc.
- Attributes and properties:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "enmet"
version = "0.6.0"
version = "0.7.0"
description = "Python API for Encyclopaedia Metallum (The Metal Archives) website."
readme = {text = """
Enmet is a programmatic API to Encyclopaedia Metallum - The Metal Archives site. It allows convenient access to Metal Archives data from python code.
Expand Down
15 changes: 11 additions & 4 deletions src/enmet/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
from abc import abstractmethod, ABC
from enum import Enum
from pathlib import PurePath
from typing import Tuple, Type
from urllib.parse import urlparse
from weakref import WeakValueDictionary

Expand Down Expand Up @@ -60,21 +62,26 @@ def datestr_to_date(date_string: str) -> PartialDate:
return PartialDate(year=int(year[0]))


class CachedInstance:
class CachedInstance(ABC):
"""Mixin to reuse existing objects."""
_CACHE = WeakValueDictionary()

def __new__(cls, *args, **kwargs):
hash_ = cls.hash(*args, **kwargs)
if obj := CachedInstance._CACHE.get((cls.__name__, hash_)):
hash_ = cls.hash(cls, *args, **kwargs)
if obj := CachedInstance._CACHE.get(hash_):
_logger.debug(f"cached get {cls.__name__} {hash_}")
return obj
else:
_logger.debug(f"uncached get {cls.__name__} {hash_}")
obj = super().__new__(cls)
CachedInstance._CACHE[(cls.__name__, hash_)] = obj
CachedInstance._CACHE[hash_] = obj
return obj

@staticmethod
@abstractmethod
def hash(cls: Type, *args, **kwargs) -> int:
"""Pseudo-hash to use in __new__."""


class ReleaseTypes(Enum):
"""Names for release types."""
Expand Down
85 changes: 59 additions & 26 deletions src/enmet/entities.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import re
from abc import ABC, abstractmethod
from abc import ABC
from datetime import datetime, timedelta
from functools import cached_property, reduce
from inspect import getmembers
from itertools import chain
from urllib.parse import urlparse

import requests
from typing import List, Iterable, Optional, Tuple, Union, Dict

from .common import CachedInstance, ReleaseTypes, url_to_id, datestr_to_date, PartialDate, BandStatuses
Expand Down Expand Up @@ -50,25 +54,31 @@ def _turn_na_into_none(data: Union[str, List, timedelta]) -> Union[List, None, s
return data


class Entity(ABC, CachedInstance):
def _get_image(url: str) -> Tuple[str, str, bytes]:
"""Returns image file name, mime type/subtype and bytes"""
response = requests.get(url)
type = response.headers["Content-Type"]
name = urlparse(response.url).path.split("/")[-1]
data = response.content
return name, type, data


class Entity(CachedInstance, ABC):
"""A thing, like band or album"""
def __repr__(self):
return f"<{self.__class__.__name__}: {self.name}>"

def __dir__(self) -> List[str]:
return [p[0] for p in getmembers(self.__class__) if type(p[1]) is cached_property]

@staticmethod
@abstractmethod
def hash(*args, **kwargs) -> Tuple:
"""Pseudo-hash for use in CachedInstance.__new__ to determine whether to use cache."""
def __eq__(self, other):
return hash(self) == hash(other)


class ExternalEntity(Entity):
"""
Non EM entity, like non-metal musician in metal album lineup.
It has only string representation and is a class just for the
sake of consistency.
Construction requires some string ("name" - actual object value) + accepts any extra attributes.
"""
def __init__(self, name: str, **kwargs):
if not hasattr(self, "name"):
Expand All @@ -79,15 +89,13 @@ def __init__(self, name: str, **kwargs):
def __dir__(self) -> Iterable[str]:
return vars(self)

def __eq__(self, other):
return hash(self) == hash(other)

def __hash__(self):
return hash(tuple(vars(self).values()))
# There is a potential issue here if attributes are added to instance after initialization.
return self.hash(self.__class__, vars(self).values())

@staticmethod
def hash(*args, **kwargs) -> Tuple:
return tuple(sorted(args) + sorted(kwargs.values()))
def hash(cls, *args, **kwargs) -> int:
return hash((cls, tuple(sorted(str(val) for val in chain(args, kwargs.values())))))


class EnmetEntity(Entity, ABC):
Expand All @@ -99,9 +107,13 @@ def __init__(self, id_):
def __repr__(self):
return f"<{self.__class__.__name__}: {self.name} ({self.id})>"

def __hash__(self):
return self.hash(self.__class__, self.id)

@staticmethod
def hash(*args, **kwargs) -> Tuple:
return args[0],
def hash(cls, *args, **kwargs) -> int:
# Assuming entities of different types cannot have the same id - ???
return hash((cls, args[0]))


class DynamicEnmetEntity(Entity, ABC):
Expand Down Expand Up @@ -219,9 +231,15 @@ def links_labels(self) -> List[Tuple[str, str]]:
def links_tabulatures(self) -> List[Tuple[str, str]]:
return self._links_page.links_tabulatures

def get_logo_image(self) -> Tuple[str, str, bytes]:
return _get_image(self._band_page.logo_image_link)

def get_band_image(self) -> Tuple[str, str, bytes]:
return _get_image(self._band_page.band_image_link)


class SimilarBand(DynamicEnmetEntity):
def __init__(self, id_: str, similar_to_id: str, score: str, name: str = None, country: str = None,
def __init__(self, id_: str, similar_to_id: str, /, score: str, name: str = None, country: str = None,
genres: str = None):
if not "band" in self.__dict__:
self.band = Band(id_, name=name, country=country, genres=genres)
Expand All @@ -237,13 +255,16 @@ def __getattr__(self, item):
def __repr__(self):
return f"<{self.__class__.__name__}: {self.band.name} ({self.score})>"

def __hash__(self):
return self.hash(self.__class__, self.band.id, self.similar_to.id)

@staticmethod
def hash(*args, **kwargs) -> Tuple:
return args[0], args[1]
def hash(cls, *args, **kwargs) -> int:
return hash((cls, args[0], args[1]))


class Album(EnmetEntity):
def __init__(self, id_: str, *, name: str = None, year: int = None):
def __init__(self, id_: str, /, *, name: str = None, year: int = None):
# Have parameters for str and repr ready
if not hasattr(self, "id"):
super().__init__(id_)
Expand Down Expand Up @@ -333,9 +354,12 @@ def other_versions(self) -> List["Album"]:
data = AlbumVersionsPage(self.id).other_versions
return [Album(url_to_id(item[0])) for item in data]

def get_image(self) -> Tuple[str, str, bytes]:
return _get_image(self._album_page.image_link)


class Disc(DynamicEnmetEntity):
def __init__(self, album_id: str, number: int = 0, bands: List[Band] = None):
def __init__(self, album_id: str, number: int = 0, /, bands: List[Band] = None):
if not hasattr(self, "_number"):
self._number = number
self._album_page = AlbumPage(album_id)
Expand All @@ -360,9 +384,12 @@ def tracks(self) -> List["Track"]:
tracks.append(Track(t[0], self._bands, int(t[1]), t[2], _timestr_to_time(t[3]), t[4]))
return tracks

def __hash__(self):
return self.hash(self.__class__, self._album_page.id, self._number)

@staticmethod
def hash(*args, **kwargs) -> Tuple:
return args[0], args[1]
def hash(cls, *args, **kwargs) -> int:
return hash((cls, args[0], args[1]))


class Track(EnmetEntity):
Expand Down Expand Up @@ -492,11 +519,14 @@ def last_modified(self) -> datetime:
data = self._artist_page.last_modified
return _timestamp_to_time(data)

def get_image(self) -> Tuple[str, str, bytes]:
return _get_image(self._artist_page.image_link)


class EntityArtist(DynamicEnmetEntity, ABC):
""""Album artist or lineup artist"""

def __init__(self, id_, role: str = None):
def __init__(self, id_, role: str = None, /):
if not "artist" in self.__dict__:
self.artist = Artist(id_)
self.role = role
Expand All @@ -507,9 +537,12 @@ def __getattr__(self, item):
def __dir__(self) -> List[str]:
return dir(self.artist) + ["role"]

def __hash__(self):
return self.hash(self.__class__, self.artist.id, self.role)

@staticmethod
def hash(*args, **kwargs) -> Tuple:
return args[0], args[1]
def hash(cls, *args, **kwargs) -> int:
return hash((cls, args[0], args[1]))


class LineupArtist(EntityArtist):
Expand Down
54 changes: 41 additions & 13 deletions src/enmet/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def __get__(self, instance, owner) -> Union[BeautifulSoup, "_CachedSite"]:


class _DataPage(_Page, CachedInstance, ABC):
"""Abstract page of data (response to a data request)"""

enmet = _CachedSite()

Expand All @@ -149,9 +150,15 @@ def _get_header_item(self, name: str) -> Optional[Tag]:
def set_session_cache(**kwargs) -> CachedSession:
return _DataPage.enmet.set_session(**kwargs)

def __eq__(self, other):
return hash(self) == hash(other)

def __hash__(self):
return self.hash(self.__class__, self.id)

@staticmethod
def hash(*args, **kwargs) -> Tuple:
return args[0],
def hash(cls, *args, **kwargs) -> int:
return hash((cls, args[0]))


class DiscographyPage(_DataPage):
Expand Down Expand Up @@ -208,7 +215,7 @@ def genres(self) -> List[str]:

@cached_property
def lyrical_themes(self) -> List[str]:
return _split_by_sep(self._get_header_item("Lyrical themes:").text.strip())
return _split_by_sep(self._get_header_item("Themes:").text.strip())

@cached_property
def current_label(self):
Expand Down Expand Up @@ -254,6 +261,14 @@ def info(self) -> str:
def last_modified(self) -> str:
return self.enmet.find("td", string=re.compile("Last modified on")).text

@cached_property
def logo_image_link(self) -> Optional[str]:
return (link := self.enmet.select(".band_name_img img")) and link[0]["src"]

@cached_property
def band_image_link(self) -> Optional[str]:
return (link := self.enmet.select(".band_img img")) and link[0]["src"]


class _BandInfoPage(_DataPage):
RESOURCE = "band/read-more/id/{}"
Expand All @@ -267,31 +282,36 @@ class BandLinksPage(_DataPage):
RESOURCE = "link/ajax-list/type/band/id/{}"

def _get_links(self, kind: str) -> List[Tuple[str, str]]:
data = self.enmet.select_one(f"#{kind}")
if data is None:
return []
else:
return [(item["href"], item.text) for item in data.select("a")]
result = []
data = self.enmet.select(f"#{kind} ~ tr")
if data is not None:
for row in data:
if row["id"].startswith("header_"):
break
else:
cell = row.select_one("a")
result.append((cell["href"], cell.text))
return result

@cached_property
def links_official(self) -> List[Tuple[str, str]]:
return self._get_links("band_links_Official")
return self._get_links("header_Official")

@cached_property
def links_official_merchandise(self) -> List[Tuple[str, str]]:
return self._get_links("band_links_Official_merchandise")
return self._get_links("header_Official_merchandise")

@cached_property
def links_unofficial(self) -> List[Tuple[str, str]]:
return self._get_links("band_links_Unofficial")
return self._get_links("header_Unofficial")

@cached_property
def links_labels(self) -> List[Tuple[str, str]]:
return self._get_links("band_links_Labels")
return self._get_links("header_Labels")

@cached_property
def links_tabulatures(self) -> List[Tuple[str, str]]:
return self._get_links("band_links_Tablatures")
return self._get_links("header_Tablatures")


class BandRecommendationsPage(_DataPage):
Expand Down Expand Up @@ -420,6 +440,10 @@ def additional_notes(self) -> str:
def last_modified(self) -> str:
return self.enmet.find("td", string=re.compile("Last modified on")).text

@cached_property
def image_link(self) -> Optional[str]:
return (link := self.enmet.select(".album_img img")) and link[0]["src"]


class AlbumVersionsPage(_DataPage):
RESOURCE = "release/ajax-versions/current/{}/parent/{}"
Expand Down Expand Up @@ -544,6 +568,10 @@ def misc_staff(self) -> Dict[Tuple[str, ...], List[Tuple[str, ...]]]:
def last_modified(self) -> str:
return self.enmet.find("td", string=re.compile("Last modified on")).text

@cached_property
def image_link(self) -> Optional[str]:
return (link := self.enmet.select(".member_img img")) and link[0]["src"]



class _ArtistBiographyPage(_DataPage):
Expand Down
Loading

0 comments on commit 601852a

Please sign in to comment.