Skip to content

Commit

Permalink
Improve (local) metadata retrieval (#1577)
Browse files Browse the repository at this point in the history
  • Loading branch information
marcelveldt authored Aug 19, 2024
1 parent af95611 commit d7b47eb
Show file tree
Hide file tree
Showing 12 changed files with 269 additions and 195 deletions.
4 changes: 2 additions & 2 deletions music_assistant/common/models/media_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ class MediaItemImage(DataClassDictMixin):

def __hash__(self) -> int:
"""Return custom hash."""
return hash((self.type.value, self.path))
return hash((self.type.value, self.provider, self.path))

def __eq__(self, other: object) -> bool:
"""Check equality of two items."""
Expand Down Expand Up @@ -238,7 +238,7 @@ def update(
continue
cur_val = getattr(self, fld.name)
if isinstance(cur_val, list) and isinstance(new_val, list):
new_val = merge_lists(cur_val, new_val)
new_val = UniqueList(merge_lists(cur_val, new_val))
setattr(self, fld.name, new_val)
elif isinstance(cur_val, set) and isinstance(new_val, set | list | tuple):
cur_val.update(new_val)
Expand Down
135 changes: 83 additions & 52 deletions music_assistant/server/controllers/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def __init__(self, *args, **kwargs) -> None:
)
self.manifest.icon = "book-information-variant"
self._scanner_task: asyncio.Task | None = None
self._online_slots_available = MAX_ONLINE_CALLS_PER_RUN

async def get_config_entries(
self,
Expand Down Expand Up @@ -232,7 +233,9 @@ def set_default_preferred_language(self, lang: str) -> None:
self.logger.warning("%s is not a valid language", lang)

@api_command("metadata/update_metadata")
async def update_metadata(self, item: str | MediaItemType, force_refresh: bool = False) -> None:
async def update_metadata(
self, item: str | MediaItemType, force_refresh: bool = False
) -> MediaItemType:
"""Get/update extra/enhanced metadata for/on given MediaItem."""
if isinstance(item, str):
item = await self.mass.music.get_item_by_uri(item)
Expand All @@ -249,6 +252,7 @@ async def update_metadata(self, item: str | MediaItemType, force_refresh: bool =
await self._update_playlist_metadata(item, force_refresh=force_refresh)
if item.media_type == MediaType.RADIO:
await self._update_radio_metadata(item, force_refresh=force_refresh)
return item

@api_command("metadata/start_scan")
def start_metadata_scanner(self) -> None:
Expand Down Expand Up @@ -415,6 +419,7 @@ async def create_collage_image(

async def _update_artist_metadata(self, artist: Artist, force_refresh: bool = False) -> None:
"""Get/update rich metadata for an artist."""
self.logger.debug("Updating metadata for Artist %s", artist.name)
unique_keys: set[str] = set()
# collect (local) metadata from all local providers
local_provs = get_global_cache_value("non_streaming_providers")
Expand All @@ -439,12 +444,9 @@ async def _update_artist_metadata(self, artist: Artist, force_refresh: bool = Fa
# to not overload the music/metadata providers with api calls
# TODO: Utilize a global (cloud) cache for metadata lookups to save on API calls

if self.config.get_value(CONF_ENABLE_ONLINE_METADATA) and (
force_refresh
or (
self._online_slots_available
and ((time() - (artist.metadata.last_refresh or 0)) > REFRESH_INTERVAL)
)
if force_refresh or (
self._online_slots_available
and ((time() - (artist.metadata.last_refresh or 0)) > REFRESH_INTERVAL)
):
self._online_slots_available -= 1
# set timestamp, used to determine when this function was last called
Expand All @@ -466,13 +468,14 @@ async def _update_artist_metadata(self, artist: Artist, force_refresh: bool = Fa
)
artist.metadata.update(prov_item.metadata)

# TODO: Use a global cache/proxy for the MB lookups to save on API calls
# The musicbrainz ID is mandatory for all metadata lookups
if not artist.mbid:
# TODO: Use a global cache/proxy for the MB lookups to save on API calls
if mbid := await self._get_artist_mbid(artist):
artist.mbid = mbid

if artist.mbid:
# The musicbrainz ID is mandatory for all metadata lookups
# collect metadata from all (online) metadata providers
if self.config.get_value(CONF_ENABLE_ONLINE_METADATA) and artist.mbid:
for provider in self.providers:
if ProviderFeature.ARTIST_METADATA not in provider.supported_features:
continue
Expand All @@ -488,6 +491,7 @@ async def _update_artist_metadata(self, artist: Artist, force_refresh: bool = Fa

async def _update_album_metadata(self, album: Album, force_refresh: bool = False) -> None:
"""Get/update rich metadata for an album."""
self.logger.debug("Updating metadata for Album %s", album.name)
unique_keys: set[str] = set()
# collect (local) metadata from all local music providers
local_provs = get_global_cache_value("non_streaming_providers")
Expand All @@ -511,13 +515,10 @@ async def _update_album_metadata(self, album: Album, force_refresh: bool = False
# NOTE: we only allow this every REFRESH_INTERVAL and a max amount of calls per day
# to not overload the (free) metadata providers with api calls
# TODO: Utilize a global (cloud) cache for metadata lookups to save on API calls
if self.config.get_value(CONF_ENABLE_ONLINE_METADATA) and (
force_refresh
or (
self._online_slots_available
and ((time() - (album.metadata.last_refresh or 0)) > REFRESH_INTERVAL)
and (album.mbid or album.artists)
)
if force_refresh or (
self._online_slots_available
and ((time() - (album.metadata.last_refresh or 0)) > REFRESH_INTERVAL)
and (album.mbid or album.artists)
):
self._online_slots_available -= 1
# set timestamp, used to determine when this function was last called
Expand All @@ -543,34 +544,33 @@ async def _update_album_metadata(self, album: Album, force_refresh: bool = False
if album.album_type == AlbumType.UNKNOWN:
album.album_type = prov_item.album_type

# collect metadata from all providers
for provider in self.providers:
if ProviderFeature.ALBUM_METADATA not in provider.supported_features:
continue
if metadata := await provider.get_album_metadata(album):
album.metadata.update(metadata)
self.logger.debug(
"Fetched metadata for Album %s on provider %s",
album.name,
provider.name,
)
# collect metadata from all (online) metadata providers
if self.config.get_value(CONF_ENABLE_ONLINE_METADATA):
for provider in self.providers:
if ProviderFeature.ALBUM_METADATA not in provider.supported_features:
continue
if metadata := await provider.get_album_metadata(album):
album.metadata.update(metadata)
self.logger.debug(
"Fetched metadata for Album %s on provider %s",
album.name,
provider.name,
)
# update final item in library database
await self.mass.music.albums.update_item_in_library(album.item_id, album)

async def _update_track_metadata(self, track: Track, force_refresh: bool = False) -> None:
"""Get/update rich metadata for a track."""
self.logger.debug("Updating metadata for Track %s", track.name)
unique_keys: set[str] = set()
# collect metadata from all (online) music/metadata providers
# NOTE: we only allow this every REFRESH_INTERVAL and a max amount of calls per day
# to not overload the (free) metadata providers with api calls
# TODO: Utilize a global (cloud) cache for metadata lookups to save on API calls
if self.config.get_value(CONF_ENABLE_ONLINE_METADATA) and (
force_refresh
or (
self._online_slots_available
and ((time() - (track.metadata.last_refresh or 0)) > REFRESH_INTERVAL)
and (track.mbid or track.artists or track.album)
)
if force_refresh or (
self._online_slots_available
and ((time() - (track.metadata.last_refresh or 0)) > REFRESH_INTERVAL)
and (track.mbid or track.artists or track.album)
):
self._online_slots_available -= 1
# set timestamp, used to determine when this function was last called
Expand All @@ -597,23 +597,25 @@ async def _update_track_metadata(self, track: Track, force_refresh: bool = False
track.metadata.update(prov_item.metadata)

# collect metadata from all metadata providers
for provider in self.providers:
if ProviderFeature.TRACK_METADATA not in provider.supported_features:
continue
if metadata := await provider.get_track_metadata(track):
track.metadata.update(metadata)
self.logger.debug(
"Fetched metadata for Track %s on provider %s",
track.name,
provider.name,
)
if self.config.get_value(CONF_ENABLE_ONLINE_METADATA):
for provider in self.providers:
if ProviderFeature.TRACK_METADATA not in provider.supported_features:
continue
if metadata := await provider.get_track_metadata(track):
track.metadata.update(metadata)
self.logger.debug(
"Fetched metadata for Track %s on provider %s",
track.name,
provider.name,
)
# update final item in library database
await self.mass.music.tracks.update_item_in_library(track.item_id, track)

async def _update_playlist_metadata(
self, playlist: Playlist, force_refresh: bool = False
) -> None:
"""Get/update rich metadata for a playlist."""
self.logger.debug("Updating metadata for Playlist %s", playlist.name)
if not force_refresh and (time() - (playlist.metadata.last_refresh or 0)) < (
60 * 60 * 24 * 5
):
Expand Down Expand Up @@ -690,21 +692,48 @@ async def _update_radio_metadata(self, radio: Radio, force_refresh: bool = False

async def _get_artist_mbid(self, artist: Artist) -> str | None:
"""Fetch musicbrainz id by performing search using the artist name, albums and tracks."""
if artist.mbid:
return artist.mbid
if compare_strings(artist.name, VARIOUS_ARTISTS_NAME):
return VARIOUS_ARTISTS_MBID

musicbrainz: MusicbrainzProvider = self.mass.get_provider("musicbrainz")
if TYPE_CHECKING:
musicbrainz = cast(MusicbrainzProvider, musicbrainz)
# first try with resource URL (e.g. streaming provider share URL)
for prov_mapping in artist.provider_mappings:
if prov_mapping.url and prov_mapping.url.startswith("http"):
if mb_artist := await musicbrainz.get_artist_details_by_resource_url(
prov_mapping.url
):
return mb_artist.id

# start lookup of musicbrainz id using artist name, albums and tracks
ref_albums = await self.mass.music.artists.albums(
artist.item_id, artist.provider, in_library_only=False
)
ref_tracks = await self.mass.music.artists.tracks(
artist.item_id, artist.provider, in_library_only=False
)
# start lookup of musicbrainz id
musicbrainz: MusicbrainzProvider = self.mass.get_provider("musicbrainz")
assert musicbrainz
if mbid := await musicbrainz.get_musicbrainz_artist_id(
artist, ref_albums=ref_albums, ref_tracks=ref_tracks
):
return mbid
# try with (strict) ref track(s), using recording id
for ref_track in ref_tracks:
if mb_artist := await musicbrainz.get_artist_details_by_track(artist.name, ref_track):
return mb_artist.id
# try with (strict) ref album(s), using releasegroup id
for ref_album in ref_albums:
if mb_artist := await musicbrainz.get_artist_details_by_album(artist.name, ref_album):
return mb_artist.id
# last restort: track matching by name
for ref_track in ref_tracks:
if not ref_track.album:
continue
if result := await musicbrainz.search(
artistname=artist.name,
albumname=ref_track.album.name,
trackname=ref_track.name,
trackversion=ref_track.version,
):
return result[0].id

# lookup failed
ref_albums_str = "/".join(x.name for x in ref_albums) or "none"
Expand All @@ -721,6 +750,7 @@ async def _get_artist_mbid(self, artist: Artist) -> str | None:

async def _metadata_scanner(self) -> None:
"""Scanner for (missing) metadata."""
self.logger.info("Starting metadata scanner")
self._online_slots_available = MAX_ONLINE_CALLS_PER_RUN
timestamp = int(time() - 60 * 60 * 24 * 30)
query = (
Expand Down Expand Up @@ -758,3 +788,4 @@ async def _metadata_scanner(self) -> None:
limit=2500, order_by="random", extra_query=query
):
await self._update_track_metadata(track)
self.logger.info("Metadata scanner finished.")
2 changes: 2 additions & 0 deletions music_assistant/server/controllers/music.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ async def remove_item_from_library(
Destructive! Will remove the item and all dependants.
"""
self.mass.metadata.stop_metadata_scanner()
ctrl = self.get_controller(media_type)
item = await ctrl.get_library_item(library_item_id)
# remove from all providers
Expand Down Expand Up @@ -810,6 +811,7 @@ def on_sync_task_done(task: asyncio.Task) -> None:

async def cleanup_provider(self, provider_instance: str) -> None:
"""Cleanup provider records from the database."""
self.mass.metadata.stop_metadata_scanner()
if provider_instance.startswith(("filesystem", "jellyfin", "plex", "opensubsonic")):
# removal of a local provider can become messy very fast due to the relations
# such as images pointing at the files etc. so we just reset the whole db
Expand Down
4 changes: 2 additions & 2 deletions music_assistant/server/models/core_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ async def reload(self, config: CoreConfig | None = None) -> None:

def _set_logger(self, log_level: str | None = None) -> None:
"""Set the logger settings."""
self.logger = logging.getLogger(f"{MASS_LOGGER_NAME}.{self.domain}")
mass_logger = logging.getLogger(MASS_LOGGER_NAME)
self.logger = mass_logger.getChild(self.domain)
if log_level is None:
log_level = self.mass.config.get_raw_core_config_value(
self.domain, CONF_LOG_LEVEL, "GLOBAL"
)
if log_level == "GLOBAL":
mass_logger = logging.getLogger(MASS_LOGGER_NAME)
self.logger.setLevel(mass_logger.level)
else:
self.logger.setLevel(log_level)
Expand Down
6 changes: 4 additions & 2 deletions music_assistant/server/models/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@ def __init__(
self.mass = mass
self.manifest = manifest
self.config = config
self.logger = logging.getLogger(f"{MASS_LOGGER_NAME}.providers.{self.domain}")
mass_logger = logging.getLogger(MASS_LOGGER_NAME)
self.logger = mass_logger.getChild(self.domain)
log_level = config.get_value(CONF_LOG_LEVEL)
if log_level == "GLOBAL":
mass_logger = logging.getLogger(MASS_LOGGER_NAME)
self.logger.setLevel(mass_logger.level)
else:
self.logger.setLevel(log_level)
if logging.getLogger().level > self.logger.level:
# if the root logger's level is higher, we need to adjust that too
logging.getLogger().setLevel(self.logger.level)
Expand Down
Loading

0 comments on commit d7b47eb

Please sign in to comment.