From 834c512e81a94f046cd4b30f8a1cd985440e1e34 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Thu, 25 Apr 2024 14:26:45 +0200 Subject: [PATCH 01/14] Add Dataset.example_media --- audbcards/core/datacard.py | 203 ++++++++++-------- audbcards/core/dataset.py | 34 +++ audbcards/core/templates/datacard_example.j2 | 4 +- .../rendered_templates/medium_db.rst | 2 +- tests/test_datacard.py | 50 +---- tests/test_dataset.py | 33 +++ 6 files changed, 188 insertions(+), 138 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index b89f7c8..725f936 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -13,6 +13,7 @@ import audiofile import audplot +from audbcards.core.config import config from audbcards.core.dataset import Dataset from audbcards.core.utils import set_plot_margins @@ -49,6 +50,11 @@ class Datacard(object): will store a wavplot of the example audio file under ``///.png`` + cache_root: cache folder. + If ``None``, + the environmental variable ``AUDBCARDS_CACHE_ROOT``, + or :attr:`audbcards.config.CACHE_ROOT` + is used """ @@ -60,6 +66,7 @@ def __init__( example: bool = True, sphinx_build_dir: str = None, sphinx_src_dir: str = None, + cache_root: str = None, ): self.dataset = dataset """Dataset object.""" @@ -76,6 +83,11 @@ def __init__( self.sphinx_src_dir = sphinx_src_dir """Sphinx source dir.""" + if cache_root is None: + cache_root = os.environ.get("AUDBCARDS_CACHE_ROOT") or config.CACHE_ROOT + self.cache_root = audeer.mkdir(cache_root) + r"""Cache root folder.""" + self.rst_preamble = "" """RST code added at top of data card.""" @@ -84,47 +96,6 @@ def content(self): """Property Accessor for rendered jinja2 content.""" return self._render_template() - @property - def example_media(self) -> typing.Optional[str]: - r"""Select example media file. - - This select a media file - based on the median duration - of all files - between 0.5 s and 300 s - and downloads it to the cache. - - """ - # Pick a meaningful duration for the example audio file - min_dur = 0.5 - max_dur = 300 # 5 min - durations = self.dataset.file_durations - selected_durations = [d for d in durations if d >= min_dur and d <= max_dur] - if len(selected_durations) == 0: - return None - selected_duration = np.median(selected_durations) - - # Get index for duration closest to selected duration - # see https://stackoverflow.com/a/9706105 - # durations.index(selected_duration) - # is an alternative but fails due to rounding errors - index = min( - range(len(durations)), - key=lambda n: abs(durations[n] - selected_duration), - ) - # Download of example data might fail - try: - media = self.dataset.deps.media[index] - audb.load_media( - self.dataset.name, - media, - version=self.dataset.version, - verbose=False, - ) - except: # noqa: E722 - media = None - return media - @property def file_duration_distribution(self) -> str: r"""Minimum and maximum of files durations, and plotted distribution. @@ -161,83 +132,131 @@ def file_duration_distribution(self) -> str: # Save distribution plot if self.sphinx_src_dir is not None: - self._plot_distribution(durations) - name = "file-durations" + file_name = f"{self.dataset.name}-{self.dataset.version}-file-durations.png" + + # Plot distribution to cache, + # if not found there already. + # Cache is organized as `///` + cache_file = audeer.path( + self.cache_root, + self.dataset.name, + self.dataset.version, + file_name, + ) + if not os.path.exists(cache_file): + audeer.mkdir(os.path.dirname(cache_file)) + self._plot_distribution(durations) + plt.savefig(cache_file, transparent=True) + plt.close() + image_file = audeer.path( self.sphinx_src_dir, self.path, self.dataset.name, - f"{self.dataset.name}-{name}.png", + file_name, ) audeer.mkdir(os.path.dirname(image_file)) - plt.savefig(image_file, transparent=True) - plt.close() + shutil.copyfile(cache_file, image_file) distribution_str = self._inline_image( f"{min_:.1f} {unit}", - f"./{self.dataset.name}/{self.dataset.name}-{name}.png", + f"./{self.dataset.name}/{file_name}", f"{max_:.1f} {unit}", ) return distribution_str - def player( - self, - file: str = None, - ) -> str: + def player(self) -> str: r"""Create an audio player showing the waveform. - Args: - file: input audio file to be used in the player. - If ``None``, - :attr:`audbcards.Datacard.example_media` - is used + Returns: + String containing RST code to include the player """ - if file is None: - file = self.example_media - # use audb cache instead of dataset.cache_root - media_src_dir = ( - f"{audb.default_cache_root()}/" - f"{audb.flavor_path(self.dataset.name, self.dataset.version)}" + def load_media_to_cache(cache_path): + # Path to corresponding media files in audb + media_src_dir = ( + f"{audb.default_cache_root()}/" + f"{audb.flavor_path(self.dataset.name, self.dataset.version)}" + ) + try: + audb.load_media( + self.dataset.name, + self.dataset.example_media, + version=self.dataset.version, + verbose=False, + ) + except: # noqa: E722 + return False + audeer.mkdir(os.path.dirname(cache_path)) + shutil.copy( + os.path.join(media_src_dir, self.dataset.example_media), + cache_path, + ) + return True + + # Cache is organized as `///` + cache_folder = audeer.path( + self.cache_root, + self.dataset.name, + self.dataset.version, + ) + cache_media_file = audeer.path( + cache_folder, + "media-example", + self.dataset.example_media, ) + plot_file_name = f"{self.dataset.name}-{self.dataset.version}-player.png" + cache_plot_file = audeer.path(cache_folder, plot_file_name) - # Move file to build folder - if self.sphinx_build_dir is not None: - media_dst_dir = audeer.path( - self.sphinx_build_dir, + # Add plot of waveform + if self.sphinx_src_dir is not None: + if not os.path.exists(cache_media_file): + load_media_to_cache(cache_media_file) + + if not os.path.exists(cache_plot_file): + signal, sampling_rate = audiofile.read( + cache_media_file, + always_2d=True, + ) + audeer.mkdir(os.path.dirname(cache_plot_file)) + plt.figure(figsize=[3, 0.5]) + ax = plt.subplot(111) + audplot.waveform(signal[0, :], ax=ax) + set_plot_margins() + plt.savefig(cache_plot_file) + plt.close() + + plot_dst_dir = audeer.path( + self.sphinx_src_dir, self.path, self.dataset.name, ) - audeer.mkdir(os.path.join(media_dst_dir, os.path.dirname(file))) + audeer.mkdir(plot_dst_dir) shutil.copy( - os.path.join(media_src_dir, file), - os.path.join(media_dst_dir, file), + cache_plot_file, + os.path.join(plot_dst_dir, plot_file_name), ) - # Add plot of waveform - if self.sphinx_src_dir is not None: - signal, sampling_rate = audiofile.read( - os.path.join(media_src_dir, file), - always_2d=True, - ) - image_file = audeer.path( - self.sphinx_src_dir, + # Copy media file to build folder + if self.sphinx_build_dir is not None: + if not os.path.exists(cache_media_file): + load_media_to_cache(cache_media_file) + + media_dst_dir = audeer.path( + self.sphinx_build_dir, self.path, self.dataset.name, - f"{self.dataset.name}.png", ) - audeer.mkdir(os.path.dirname(image_file)) - plt.figure(figsize=[3, 0.5]) - ax = plt.subplot(111) - audplot.waveform(signal[0, :], ax=ax) - set_plot_margins() - plt.savefig(image_file) - plt.close() - - player_src = f"./{self.dataset.name}/{file}" + audeer.mkdir(media_dst_dir, os.path.dirname(self.dataset.example_media)) + shutil.copy( + cache_media_file, + os.path.join(media_dst_dir, self.dataset.example_media), + ) + + player_src = f"./{self.dataset.name}/{self.dataset.example_media}" player_str = ( - f".. image:: ./{self.dataset.name}/{self.dataset.name}.png\n" + f".. image:: ./{self.dataset.name}/{plot_file_name}\n" "\n" ".. raw:: html\n" "\n" @@ -365,14 +384,10 @@ def _expand_dataset( """ # Add path of datacard folder dataset["path"] = self.path - # Add audio player for example file - dataset["example"] = None if self.example: - example = self.example_media - if example is not None: - player = self.player(example) + if self.dataset.example_media is not None: + player = self.player() dataset["player"] = player - dataset["example"] = example dataset["file_duration_distribution"] = self.file_duration_distribution return dataset diff --git a/audbcards/core/dataset.py b/audbcards/core/dataset.py index e2652b3..e5d5d21 100644 --- a/audbcards/core/dataset.py +++ b/audbcards/core/dataset.py @@ -5,6 +5,7 @@ import typing import jinja2 +import numpy as np import pandas as pd import audb @@ -189,6 +190,39 @@ def duration(self) -> pd.Timedelta: unit="s", ) + @functools.cached_property + def example_media(self) -> typing.Optional[str]: + r"""Example media file. + + The media file is selected + by its median duration + from all files in the dataset + with a duration + between 0.5 s and 300 s. + If no media file meets this criterium, + ``None`` is returned instead. + + """ + # Pick a meaningful duration for the example audio file + min_dur = 0.5 + max_dur = 300 # 5 min + durations = self.file_durations + selected_durations = [d for d in durations if d >= min_dur and d <= max_dur] + + if len(selected_durations) == 0: + return None + + selected_duration = np.median(selected_durations) + # Get index for duration closest to selected duration + # see https://stackoverflow.com/a/9706105 + # durations.index(selected_duration) + # is an alternative but fails due to rounding errors + index = min( + range(len(durations)), + key=lambda n: abs(durations[n] - selected_duration), + ) + return self.deps.media[index] + @functools.cached_property def files(self) -> int: r"""Number of media files in dataset.""" diff --git a/audbcards/core/templates/datacard_example.j2 b/audbcards/core/templates/datacard_example.j2 index 4989120..672b60c 100644 --- a/audbcards/core/templates/datacard_example.j2 +++ b/audbcards/core/templates/datacard_example.j2 @@ -1,10 +1,10 @@ -{% if example is not none %} +{% if example_media is not none %} Example {% for n in range("Example"|length) %}^{% endfor %} -:file:`{{ example }}` +:file:`{{ example_media }}` {{ player }} {% endif %} diff --git a/tests/test_data/rendered_templates/medium_db.rst b/tests/test_data/rendered_templates/medium_db.rst index 894f01b..a560323 100644 --- a/tests/test_data/rendered_templates/medium_db.rst +++ b/tests/test_data/rendered_templates/medium_db.rst @@ -31,7 +31,7 @@ Example :file:`data/f0.wav` -.. image:: ./medium_db/medium_db.png +.. image:: ./medium_db/medium_db-1.0.0-player.png .. raw:: html diff --git a/tests/test_datacard.py b/tests/test_datacard.py index 098626b..3eb24b9 100644 --- a/tests/test_datacard.py +++ b/tests/test_datacard.py @@ -1,9 +1,7 @@ import os -import posixpath import re import matplotlib.pyplot as plt -import numpy as np import pytest import audeer @@ -44,38 +42,6 @@ def test_datacard(db, cache, request): assert content == expected_content -@pytest.mark.parametrize( - "db", - [ - "medium_db", - ], -) -def test_datacard_example_media(db, cache, request): - r"""Test Datacard.example_media. - - It checks that the desired audio file - is selected as example. - - """ - db = request.getfixturevalue(db) - dataset = audbcards.Dataset(db.name, pytest.VERSION, cache_root=cache) - datacard = audbcards.Datacard(dataset) - - # Relative path to audio file from database - # as written in the dependencies table, - # for example data/file.wav - durations = [d.total_seconds() for d in db.files_duration(db.files)] - median_duration = np.median([d for d in durations if 0.5 < d < 300]) - expected_example_index = min( - range(len(durations)), key=lambda n: abs(durations[n] - median_duration) - ) - expected_example = audeer.path(db.files[expected_example_index]).replace( - os.sep, posixpath.sep - ) - expected_example = "/".join(expected_example.split("/")[-2:]) - assert datacard.example_media == expected_example - - @pytest.mark.parametrize( "db, expected_min, expected_max", [ @@ -113,7 +79,7 @@ def test_datacard_file_duration_distribution( build_dir, datacard.path, db.name, - f"{db.name}-file-durations.png", + f"{db.name}-{pytest.VERSION}-file-durations.png", ) assert not os.path.exists(image_file) if expected_min == expected_max: @@ -129,7 +95,9 @@ def test_datacard_file_duration_distribution( if expected_min != expected_max: assert os.path.exists(image_file) expected_distribution_str = ( - f"{expected_min:.1f} s |{db.name}-file-durations| {expected_max:.1f} s" + f"{expected_min:.1f} s " + f"|{db.name}-{pytest.VERSION}-file-durations| " + f"{expected_max:.1f} s" ) assert expected_distribution_str == distribution_str @@ -164,13 +132,13 @@ def test_datacard_player(tmpdir, db, cache, request): build_dir, datacard.path, db.name, - datacard.example_media, + datacard.dataset.example_media, ) image_file = audeer.path( src_dir, datacard.path, db.name, - f"{db.name}.png", + f"{db.name}-{pytest.VERSION}-player.png", ) assert not os.path.exists(media_file) assert not os.path.exists(image_file) @@ -178,7 +146,7 @@ def test_datacard_player(tmpdir, db, cache, request): # Set sphinx src and build dir and execute again datacard.sphinx_build_dir = build_dir datacard.sphinx_src_dir = src_dir - player_str = datacard.player(datacard.example_media) + player_str = datacard.player() assert os.path.exists(media_file) assert os.path.exists(image_file) @@ -201,11 +169,11 @@ def test_datacard_player(tmpdir, db, cache, request): # Append audio to the expected player_str expected_player_str = ( - f".. image:: ./{db.name}/{db.name}.png\n" + f".. image:: ./{db.name}/{db.name}-{pytest.VERSION}-player.png\n" "\n" ".. raw:: html\n" "\n" - f'

" ) # Check if the generated player_str and the expected matches diff --git a/tests/test_dataset.py b/tests/test_dataset.py index fe0f0d0..d099dc2 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -1,5 +1,7 @@ import os +import posixpath +import numpy as np import pandas as pd import pytest @@ -258,6 +260,37 @@ def test_iso_language_property(dbs, cache, request): _ = [dataset.iso_languages for dataset in datasets] +@pytest.mark.parametrize( + "db", + [ + "medium_db", + ], +) +def test_dataset_example_media(db, cache, request): + r"""Test Dataset.example_media. + + It checks that the desired audio file + is selected as example. + + """ + db = request.getfixturevalue(db) + dataset = audbcards.Dataset(db.name, pytest.VERSION, cache_root=cache) + + # Relative path to audio file from database + # as written in the dependencies table, + # for example data/file.wav + durations = [d.total_seconds() for d in db.files_duration(db.files)] + median_duration = np.median([d for d in durations if 0.5 < d < 300]) + expected_example_index = min( + range(len(durations)), key=lambda n: abs(durations[n] - median_duration) + ) + expected_example = audeer.path(db.files[expected_example_index]).replace( + os.sep, posixpath.sep + ) + expected_example = "/".join(expected_example.split("/")[-2:]) + assert dataset.example_media == expected_example + + @pytest.fixture def constructor(tmpdir, medium_db, request): """Fixture to test Dataset constructor.""" From b07b21522ba69966d468097da337e1545c8f10ab Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 30 Apr 2024 13:14:22 +0200 Subject: [PATCH 02/14] Rename cache files for Datacard.player() --- audbcards/core/datacard.py | 95 +++++++++++-------- .../rendered_templates/medium_db.rst | 2 +- tests/test_datacard.py | 4 +- 3 files changed, 59 insertions(+), 42 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 725f936..7705fe3 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -168,63 +168,79 @@ def file_duration_distribution(self) -> str: def player(self) -> str: r"""Create an audio player showing the waveform. + If :attr:`audbcards.Datacard.sphinx_build_dir` + or :attr:`audbcards.Datacard.sphinx_src_dir` + are not ``None``, + an example media file is cached in the folder + ``${cache_root}/{name}-{version}-player-media/``, + using the same sub-folder structure + as the media file has inside its dataset. + + If :attr:`audbcards.Datacard.sphinx_src_dir` is not ``None``, + an plot of the waveform of the media file + is cached at + ``${cache_root}/{name}-{version}-player-waveform.png``. + Returns: String containing RST code to include the player """ - - def load_media_to_cache(cache_path): - # Path to corresponding media files in audb - media_src_dir = ( - f"{audb.default_cache_root()}/" - f"{audb.flavor_path(self.dataset.name, self.dataset.version)}" - ) - try: - audb.load_media( - self.dataset.name, - self.dataset.example_media, - version=self.dataset.version, - verbose=False, - ) - except: # noqa: E722 - return False - audeer.mkdir(os.path.dirname(cache_path)) - shutil.copy( - os.path.join(media_src_dir, self.dataset.example_media), - cache_path, - ) - return True - # Cache is organized as `///` cache_folder = audeer.path( self.cache_root, self.dataset.name, self.dataset.version, ) - cache_media_file = audeer.path( + cache_example_media = audeer.path( cache_folder, - "media-example", + "player-media", self.dataset.example_media, ) - plot_file_name = f"{self.dataset.name}-{self.dataset.version}-player.png" - cache_plot_file = audeer.path(cache_folder, plot_file_name) + cache_waveform_file = audeer.path( + cache_folder, + f"{self.dataset.name}-{self.dataset.version}-player-waveform.png", + ) + + def load_media_to_cache(cache_example_media: str): + r"""Load media file with audb and copy to audbcards cache. + + Args: + cache_example_media: full path to media file in cache + + """ + # Path to corresponding media files in audb + media_src_dir = ( + f"{audb.default_cache_root()}/" + f"{audb.flavor_path(self.dataset.name, self.dataset.version)}" + ) + audb.load_media( + self.dataset.name, + self.dataset.example_media, + version=self.dataset.version, + verbose=False, + ) + audeer.mkdir(os.path.dirname(cache_example_media)) + shutil.copy( + os.path.join(media_src_dir, self.dataset.example_media), + cache_example_media, + ) # Add plot of waveform if self.sphinx_src_dir is not None: - if not os.path.exists(cache_media_file): - load_media_to_cache(cache_media_file) + if not os.path.exists(cache_example_media): + load_media_to_cache(cache_example_media) - if not os.path.exists(cache_plot_file): + if not os.path.exists(cache_waveform_file): signal, sampling_rate = audiofile.read( - cache_media_file, + cache_example_media, always_2d=True, ) - audeer.mkdir(os.path.dirname(cache_plot_file)) + audeer.mkdir(os.path.dirname(cache_waveform_file)) plt.figure(figsize=[3, 0.5]) ax = plt.subplot(111) audplot.waveform(signal[0, :], ax=ax) set_plot_margins() - plt.savefig(cache_plot_file) + plt.savefig(cache_waveform_file) plt.close() plot_dst_dir = audeer.path( @@ -234,14 +250,14 @@ def load_media_to_cache(cache_path): ) audeer.mkdir(plot_dst_dir) shutil.copy( - cache_plot_file, - os.path.join(plot_dst_dir, plot_file_name), + cache_waveform_file, + os.path.join(plot_dst_dir, os.path.basename(cache_waveform_file)), ) # Copy media file to build folder if self.sphinx_build_dir is not None: - if not os.path.exists(cache_media_file): - load_media_to_cache(cache_media_file) + if not os.path.exists(cache_example_media): + load_media_to_cache(cache_example_media) media_dst_dir = audeer.path( self.sphinx_build_dir, @@ -250,13 +266,14 @@ def load_media_to_cache(cache_path): ) audeer.mkdir(media_dst_dir, os.path.dirname(self.dataset.example_media)) shutil.copy( - cache_media_file, + cache_example_media, os.path.join(media_dst_dir, self.dataset.example_media), ) + waveform_src = f"./{self.dataset.name}/{os.path.basename(cache_waveform_file)}" player_src = f"./{self.dataset.name}/{self.dataset.example_media}" player_str = ( - f".. image:: ./{self.dataset.name}/{plot_file_name}\n" + f".. image:: {waveform_src}\n" "\n" ".. raw:: html\n" "\n" diff --git a/tests/test_data/rendered_templates/medium_db.rst b/tests/test_data/rendered_templates/medium_db.rst index a560323..0e2a0d6 100644 --- a/tests/test_data/rendered_templates/medium_db.rst +++ b/tests/test_data/rendered_templates/medium_db.rst @@ -31,7 +31,7 @@ Example :file:`data/f0.wav` -.. image:: ./medium_db/medium_db-1.0.0-player.png +.. image:: ./medium_db/medium_db-1.0.0-player-waveform.png .. raw:: html diff --git a/tests/test_datacard.py b/tests/test_datacard.py index 3eb24b9..d7cb60b 100644 --- a/tests/test_datacard.py +++ b/tests/test_datacard.py @@ -138,7 +138,7 @@ def test_datacard_player(tmpdir, db, cache, request): src_dir, datacard.path, db.name, - f"{db.name}-{pytest.VERSION}-player.png", + f"{db.name}-{pytest.VERSION}-player-waveform.png", ) assert not os.path.exists(media_file) assert not os.path.exists(image_file) @@ -169,7 +169,7 @@ def test_datacard_player(tmpdir, db, cache, request): # Append audio to the expected player_str expected_player_str = ( - f".. image:: ./{db.name}/{db.name}-{pytest.VERSION}-player.png\n" + f".. image:: ./{db.name}/{db.name}-{pytest.VERSION}-player-waveform.png\n" "\n" ".. raw:: html\n" "\n" From 11ff610f1f9d05830d509404dbd77825fdd2be99 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 30 Apr 2024 13:25:36 +0200 Subject: [PATCH 03/14] Rename cache for Datacard.plot_file_distribution() --- audbcards/core/datacard.py | 36 +++++++++++++++++++++--------------- tests/test_datacard.py | 4 ++-- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 7705fe3..abbc379 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -104,17 +104,32 @@ def file_duration_distribution(self) -> str: containing the mininimum and maximum values of files durations. - If :attr:`audbcards.Datacard.sphinx_src_dir` is set + If :attr:`audbcards.Datacard.sphinx_src_dir` is not ``None`` (e.g. when used in the sphinx extension), - an inline image is stored - in the sphinx source folder - under ``/-file-durations.png`` - and displayed + an image is stored in the file + ``--file-duration-distribution.png``, + which is cached in + ``///`` + and copied to the sphinx source folder + under + ``/``. + The image is displayed inline between the minimum and maximum values. If all duration values are the same, no distribution plot is created. - """ + """ # noqa: E501 + file_name = ( + f"{self.dataset.name}-{self.dataset.version}-file-duration-distribution.png" + ) + # Cache is organized as `///` + cache_file = audeer.path( + self.cache_root, + self.dataset.name, + self.dataset.version, + file_name, + ) + min_ = 0 max_ = 0 unit = "s" @@ -132,17 +147,8 @@ def file_duration_distribution(self) -> str: # Save distribution plot if self.sphinx_src_dir is not None: - file_name = f"{self.dataset.name}-{self.dataset.version}-file-durations.png" - # Plot distribution to cache, # if not found there already. - # Cache is organized as `///` - cache_file = audeer.path( - self.cache_root, - self.dataset.name, - self.dataset.version, - file_name, - ) if not os.path.exists(cache_file): audeer.mkdir(os.path.dirname(cache_file)) self._plot_distribution(durations) diff --git a/tests/test_datacard.py b/tests/test_datacard.py index d7cb60b..f5adbcb 100644 --- a/tests/test_datacard.py +++ b/tests/test_datacard.py @@ -79,7 +79,7 @@ def test_datacard_file_duration_distribution( build_dir, datacard.path, db.name, - f"{db.name}-{pytest.VERSION}-file-durations.png", + f"{db.name}-{pytest.VERSION}-file-duration-distribution.png", ) assert not os.path.exists(image_file) if expected_min == expected_max: @@ -96,7 +96,7 @@ def test_datacard_file_duration_distribution( assert os.path.exists(image_file) expected_distribution_str = ( f"{expected_min:.1f} s " - f"|{db.name}-{pytest.VERSION}-file-durations| " + f"|{db.name}-{pytest.VERSION}-file-duration-distribution| " f"{expected_max:.1f} s" ) assert expected_distribution_str == distribution_str From 78f436c58ddf6fba4ce2af4ec0d01e07a4c7770e Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 30 Apr 2024 13:35:43 +0200 Subject: [PATCH 04/14] Update dostring --- audbcards/core/datacard.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index abbc379..65d76a9 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -112,7 +112,7 @@ def file_duration_distribution(self) -> str: ``///`` and copied to the sphinx source folder under - ``/``. + ``//``. The image is displayed inline between the minimum and maximum values. If all duration values are the same, @@ -178,14 +178,26 @@ def player(self) -> str: or :attr:`audbcards.Datacard.sphinx_src_dir` are not ``None``, an example media file is cached in the folder - ``${cache_root}/{name}-{version}-player-media/``, + ``--player-media/`` + inside + ``///``, using the same sub-folder structure as the media file has inside its dataset. + If :attr:`audbcards.Datacard.sphinx_build_dir` + is not ``None``, + the media sub-folder structure + is also copied + to the sphinx build dir under + ``///``. If :attr:`audbcards.Datacard.sphinx_src_dir` is not ``None``, an plot of the waveform of the media file - is cached at - ``${cache_root}/{name}-{version}-player-waveform.png``. + is cached under + ``--player-waveform.png`` + inside + ``///``. + It is also copied to the sphinx source folder into + ``///``. Returns: String containing RST code to include the player From a9af6bea2d05bd05e2dca31e084e1b5775cd0e07 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 30 Apr 2024 13:38:00 +0200 Subject: [PATCH 05/14] Fix typo --- audbcards/core/datacard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 65d76a9..98882e2 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -176,7 +176,7 @@ def player(self) -> str: If :attr:`audbcards.Datacard.sphinx_build_dir` or :attr:`audbcards.Datacard.sphinx_src_dir` - are not ``None``, + is not ``None``, an example media file is cached in the folder ``--player-media/`` inside From 085de2b0d4b32d35b0c8db366bbffea099e81cba Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 30 Apr 2024 13:39:43 +0200 Subject: [PATCH 06/14] Fix typos --- audbcards/core/datacard.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 98882e2..88a7bfa 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -111,7 +111,7 @@ def file_duration_distribution(self) -> str: which is cached in ``///`` and copied to the sphinx source folder - under + into ``//``. The image is displayed inline between the minimum and maximum values. @@ -187,11 +187,11 @@ def player(self) -> str: is not ``None``, the media sub-folder structure is also copied - to the sphinx build dir under + to the sphinx build dir into ``///``. If :attr:`audbcards.Datacard.sphinx_src_dir` is not ``None``, - an plot of the waveform of the media file + a plot of the waveform of the media file is cached under ``--player-waveform.png`` inside From 6358c5e362bdaf88eb32305fd3c6f5ac098ca12b Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 30 Apr 2024 13:50:25 +0200 Subject: [PATCH 07/14] Adjust folder names in docstrings --- audbcards/core/datacard.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 88a7bfa..c54ea14 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -42,14 +42,14 @@ class Datacard(object): a call to :meth:`audbcards.Datacard.player` will store an example audio file under - ``///`` + ``///`` sphinx_src_dir: source dir of sphinx. If not ``None`` and ``example`` is ``True``, a call to :meth:`audbcards.Datacard.player` - will store a wavplot of the example audio file + will store a wavform plot of the example audio file under - ``///.png`` + ``///`` cache_root: cache folder. If ``None``, the environmental variable ``AUDBCARDS_CACHE_ROOT``, @@ -112,7 +112,7 @@ def file_duration_distribution(self) -> str: ``///`` and copied to the sphinx source folder into - ``//``. + ``//``. The image is displayed inline between the minimum and maximum values. If all duration values are the same, @@ -188,7 +188,7 @@ def player(self) -> str: the media sub-folder structure is also copied to the sphinx build dir into - ``///``. + ``///``. If :attr:`audbcards.Datacard.sphinx_src_dir` is not ``None``, a plot of the waveform of the media file @@ -197,7 +197,7 @@ def player(self) -> str: inside ``///``. It is also copied to the sphinx source folder into - ``///``. + ``///``. Returns: String containing RST code to include the player From 6c6e09d6ad73639e5a5b7e328628191141ba5018 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 30 Apr 2024 13:55:03 +0200 Subject: [PATCH 08/14] Remove noqa: E501 from docstring --- audbcards/core/datacard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index c54ea14..25ecccc 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -118,7 +118,7 @@ def file_duration_distribution(self) -> str: If all duration values are the same, no distribution plot is created. - """ # noqa: E501 + """ file_name = ( f"{self.dataset.name}-{self.dataset.version}-file-duration-distribution.png" ) From 094317981b360ee05368f6cc2a41f1cb5f759918 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 30 Apr 2024 14:02:16 +0200 Subject: [PATCH 09/14] Fix location of media cache --- audbcards/core/datacard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 25ecccc..2b0f68d 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -211,7 +211,7 @@ def player(self) -> str: ) cache_example_media = audeer.path( cache_folder, - "player-media", + f"{self.dataset.name}-{self.dataset.version}-player-media", self.dataset.example_media, ) cache_waveform_file = audeer.path( From e1957e17e55c50b1ee53824b08ed5d99873538b5 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Thu, 2 May 2024 11:47:23 +0200 Subject: [PATCH 10/14] Make example media path deterministic --- audbcards/core/datacard.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 2b0f68d..de4523e 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -226,22 +226,14 @@ def load_media_to_cache(cache_example_media: str): cache_example_media: full path to media file in cache """ - # Path to corresponding media files in audb - media_src_dir = ( - f"{audb.default_cache_root()}/" - f"{audb.flavor_path(self.dataset.name, self.dataset.version)}" - ) - audb.load_media( + media = audb.load_media( self.dataset.name, self.dataset.example_media, version=self.dataset.version, verbose=False, - ) + )[0] audeer.mkdir(os.path.dirname(cache_example_media)) - shutil.copy( - os.path.join(media_src_dir, self.dataset.example_media), - cache_example_media, - ) + shutil.copy(media, cache_example_media) # Add plot of waveform if self.sphinx_src_dir is not None: From 510f4d5dcd5f6618a8e15c31478e97ed2c348ccb Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 8 May 2024 11:23:17 +0200 Subject: [PATCH 11/14] FIX: wavform => waveform --- audbcards/core/datacard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index de4523e..0fb4800 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -47,7 +47,7 @@ class Datacard(object): If not ``None`` and ``example`` is ``True``, a call to :meth:`audbcards.Datacard.player` - will store a wavform plot of the example audio file + will store a waveform plot of the example audio file under ``///`` cache_root: cache folder. From 1b1be144902bf39d2665bb002f2473bd7651e857 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 8 May 2024 11:24:56 +0200 Subject: [PATCH 12/14] Move cache_file definitions down --- audbcards/core/datacard.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 0fb4800..22de60e 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -203,21 +203,6 @@ def player(self) -> str: String containing RST code to include the player """ - # Cache is organized as `///` - cache_folder = audeer.path( - self.cache_root, - self.dataset.name, - self.dataset.version, - ) - cache_example_media = audeer.path( - cache_folder, - f"{self.dataset.name}-{self.dataset.version}-player-media", - self.dataset.example_media, - ) - cache_waveform_file = audeer.path( - cache_folder, - f"{self.dataset.name}-{self.dataset.version}-player-waveform.png", - ) def load_media_to_cache(cache_example_media: str): r"""Load media file with audb and copy to audbcards cache. @@ -235,6 +220,22 @@ def load_media_to_cache(cache_example_media: str): audeer.mkdir(os.path.dirname(cache_example_media)) shutil.copy(media, cache_example_media) + # Cache is organized as `///` + cache_folder = audeer.path( + self.cache_root, + self.dataset.name, + self.dataset.version, + ) + cache_example_media = audeer.path( + cache_folder, + f"{self.dataset.name}-{self.dataset.version}-player-media", + self.dataset.example_media, + ) + cache_waveform_file = audeer.path( + cache_folder, + f"{self.dataset.name}-{self.dataset.version}-player-waveform.png", + ) + # Add plot of waveform if self.sphinx_src_dir is not None: if not os.path.exists(cache_example_media): From 3c89558a95e4ed699a4a897ac667de9d8118fb08 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 8 May 2024 11:29:31 +0200 Subject: [PATCH 13/14] Add extra function for plotting waveform --- audbcards/core/datacard.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 22de60e..8af3dc6 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -220,6 +220,26 @@ def load_media_to_cache(cache_example_media: str): audeer.mkdir(os.path.dirname(cache_example_media)) shutil.copy(media, cache_example_media) + def plot_waveform_to_cache(cache_example_media: str, cache_waveform_file: str): + r"""Plot waveform of example media to cache. + + Args: + cache_example_media: full path to media file in cache + cache_waveform_file: full path to waveform file in cache + + """ + signal, sampling_rate = audiofile.read( + cache_example_media, + always_2d=True, + ) + audeer.mkdir(os.path.dirname(cache_waveform_file)) + plt.figure(figsize=[3, 0.5]) + ax = plt.subplot(111) + audplot.waveform(signal[0, :], ax=ax) + set_plot_margins() + plt.savefig(cache_waveform_file) + plt.close() + # Cache is organized as `///` cache_folder = audeer.path( self.cache_root, @@ -242,17 +262,7 @@ def load_media_to_cache(cache_example_media: str): load_media_to_cache(cache_example_media) if not os.path.exists(cache_waveform_file): - signal, sampling_rate = audiofile.read( - cache_example_media, - always_2d=True, - ) - audeer.mkdir(os.path.dirname(cache_waveform_file)) - plt.figure(figsize=[3, 0.5]) - ax = plt.subplot(111) - audplot.waveform(signal[0, :], ax=ax) - set_plot_margins() - plt.savefig(cache_waveform_file) - plt.close() + plot_waveform_to_cache(cache_example_media, cache_waveform_file) plot_dst_dir = audeer.path( self.sphinx_src_dir, From 40b58769a5b1a6dd1ff5eceb99dcada1b07091ac Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 8 May 2024 12:40:18 +0200 Subject: [PATCH 14/14] Add extra function to plot waveform --- audbcards/core/datacard.py | 135 ++++++++++-------- audbcards/core/templates/datacard_example.j2 | 2 + .../rendered_templates/medium_db.rst | 4 +- tests/test_datacard.py | 60 ++++++-- 4 files changed, 126 insertions(+), 75 deletions(-) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 8af3dc6..ba1cf12 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -188,7 +188,9 @@ def player(self) -> str: the media sub-folder structure is also copied to the sphinx build dir into - ``///``. + ``///``, + and an audio element referencing this file + is added to the returned RST string. If :attr:`audbcards.Datacard.sphinx_src_dir` is not ``None``, a plot of the waveform of the media file @@ -197,73 +199,86 @@ def player(self) -> str: inside ``///``. It is also copied to the sphinx source folder into - ``///``. + ``///``, + and referenced at the beginning of the returned RST string. + + If :attr:`audbcards.Datacard.sphinx_build_dir` + and :attr:`audbcards.Datacard.sphinx_src_dir` + are ``None``, + an empty string is returned. Returns: String containing RST code to include the player """ + # Cache is organized as `///` + cache_folder = audeer.path( + self.cache_root, + self.dataset.name, + self.dataset.version, + ) - def load_media_to_cache(cache_example_media: str): + def load_media_to_cache() -> str: r"""Load media file with audb and copy to audbcards cache. - Args: - cache_example_media: full path to media file in cache + Load example media file to cache, + if not existent. + + Returns: + full path to media file in cache """ - media = audb.load_media( - self.dataset.name, + cache_example_media = audeer.path( + cache_folder, + f"{self.dataset.name}-{self.dataset.version}-player-media", self.dataset.example_media, - version=self.dataset.version, - verbose=False, - )[0] - audeer.mkdir(os.path.dirname(cache_example_media)) - shutil.copy(media, cache_example_media) - - def plot_waveform_to_cache(cache_example_media: str, cache_waveform_file: str): + ) + if not os.path.exists(cache_example_media): + media = audb.load_media( + self.dataset.name, + self.dataset.example_media, + version=self.dataset.version, + verbose=False, + )[0] + audeer.mkdir(os.path.dirname(cache_example_media)) + shutil.copy(media, cache_example_media) + return cache_example_media + + def plot_waveform_to_cache(cache_example_media: str) -> str: r"""Plot waveform of example media to cache. Args: cache_example_media: full path to media file in cache - cache_waveform_file: full path to waveform file in cache + + Returns: + full path to waveform file in cache """ - signal, sampling_rate = audiofile.read( - cache_example_media, - always_2d=True, + cache_waveform_file = audeer.path( + cache_folder, + f"{self.dataset.name}-{self.dataset.version}-player-waveform.png", ) - audeer.mkdir(os.path.dirname(cache_waveform_file)) - plt.figure(figsize=[3, 0.5]) - ax = plt.subplot(111) - audplot.waveform(signal[0, :], ax=ax) - set_plot_margins() - plt.savefig(cache_waveform_file) - plt.close() + if not os.path.exists(cache_waveform_file): + signal, sampling_rate = audiofile.read( + cache_example_media, + always_2d=True, + ) + audeer.mkdir(os.path.dirname(cache_waveform_file)) + plt.figure(figsize=[3, 0.5]) + ax = plt.subplot(111) + audplot.waveform(signal[0, :], ax=ax) + set_plot_margins() + plt.savefig(cache_waveform_file) + plt.close() + return cache_waveform_file - # Cache is organized as `///` - cache_folder = audeer.path( - self.cache_root, - self.dataset.name, - self.dataset.version, - ) - cache_example_media = audeer.path( - cache_folder, - f"{self.dataset.name}-{self.dataset.version}-player-media", - self.dataset.example_media, - ) - cache_waveform_file = audeer.path( - cache_folder, - f"{self.dataset.name}-{self.dataset.version}-player-waveform.png", - ) + # String holding the RST code to include the player + player_str = "" - # Add plot of waveform + # Add plot of waveform to Sphinx source folder (e.g. docs/) if self.sphinx_src_dir is not None: - if not os.path.exists(cache_example_media): - load_media_to_cache(cache_example_media) - - if not os.path.exists(cache_waveform_file): - plot_waveform_to_cache(cache_example_media, cache_waveform_file) - + cache_example_media = load_media_to_cache() + cache_waveform_file = plot_waveform_to_cache(cache_example_media) plot_dst_dir = audeer.path( self.sphinx_src_dir, self.path, @@ -274,12 +289,14 @@ def plot_waveform_to_cache(cache_example_media: str, cache_waveform_file: str): cache_waveform_file, os.path.join(plot_dst_dir, os.path.basename(cache_waveform_file)), ) + waveform_src = ( + f"./{self.dataset.name}/{os.path.basename(cache_waveform_file)}" + ) + player_str += f".. image:: {waveform_src}\n\n" - # Copy media file to build folder + # Copy media file to Sphinx build folder (e.g. build/) if self.sphinx_build_dir is not None: - if not os.path.exists(cache_example_media): - load_media_to_cache(cache_example_media) - + cache_example_media = load_media_to_cache() media_dst_dir = audeer.path( self.sphinx_build_dir, self.path, @@ -291,15 +308,13 @@ def plot_waveform_to_cache(cache_example_media: str, cache_waveform_file: str): os.path.join(media_dst_dir, self.dataset.example_media), ) - waveform_src = f"./{self.dataset.name}/{os.path.basename(cache_waveform_file)}" - player_src = f"./{self.dataset.name}/{self.dataset.example_media}" - player_str = ( - f".. image:: {waveform_src}\n" - "\n" - ".. raw:: html\n" - "\n" - f'

' - ) + player_src = f"./{self.dataset.name}/{self.dataset.example_media}" + player_str += ( + ".. raw:: html\n" + "\n" + f'

' + ) + return player_str def save(self, file: str = None): diff --git a/audbcards/core/templates/datacard_example.j2 b/audbcards/core/templates/datacard_example.j2 index 672b60c..ab50133 100644 --- a/audbcards/core/templates/datacard_example.j2 +++ b/audbcards/core/templates/datacard_example.j2 @@ -5,6 +5,8 @@ Example :file:`{{ example_media }}` +{% if player %} {{ player }} {% endif %} +{% endif %} diff --git a/tests/test_data/rendered_templates/medium_db.rst b/tests/test_data/rendered_templates/medium_db.rst index 0e2a0d6..dc1429b 100644 --- a/tests/test_data/rendered_templates/medium_db.rst +++ b/tests/test_data/rendered_templates/medium_db.rst @@ -1,3 +1,5 @@ +.. |medium_db-1.0.0-file-duration-distribution| image:: ./medium_db/medium_db-1.0.0-file-duration-distribution.png + .. _datasets-medium_db: medium_db @@ -16,7 +18,7 @@ channel 1 sampling rate 8000 bit depth 16 duration 0 days 00:05:02 -files 2, duration distribution: 1.0 s .. 301.0 s +files 2, duration distribution: 1.0 s |medium_db-1.0.0-file-duration-distribution| 301.0 s repository `data-local <.../data-local/medium_db>`__ published 2023-04-05 by author ============= ====================== diff --git a/tests/test_datacard.py b/tests/test_datacard.py index f5adbcb..389f580 100644 --- a/tests/test_datacard.py +++ b/tests/test_datacard.py @@ -21,11 +21,18 @@ "medium_db", ], ) -def test_datacard(db, cache, request): +def test_datacard(tmpdir, db, cache, request): """Test datacard creation from jinja2 templates.""" db = request.getfixturevalue(db) dataset = audbcards.Dataset(db.name, pytest.VERSION, cache_root=cache) datacard = audbcards.Datacard(dataset) + + # Set sphinx src and build dir + build_dir = audeer.mkdir(tmpdir, "build", "html") + src_dir = audeer.mkdir(tmpdir, "docs") + datacard.sphinx_build_dir = build_dir + datacard.sphinx_src_dir = src_dir + content = datacard._render_template() content = content.rstrip() expected_content = load_rendered_template(db.name) @@ -125,6 +132,7 @@ def test_datacard_player(tmpdir, db, cache, request): # Execute player # without specifying sphinx src and build dirs + expected_player_str = "" player_str = datacard.player() build_dir = audeer.mkdir(tmpdir, "build", "html") src_dir = audeer.mkdir(tmpdir, "docs") @@ -142,13 +150,49 @@ def test_datacard_player(tmpdir, db, cache, request): ) assert not os.path.exists(media_file) assert not os.path.exists(image_file) + assert player_str == expected_player_str - # Set sphinx src and build dir and execute again + # With sphinx source dir + expected_player_str = ( + f".. image:: ./{db.name}/{db.name}-{pytest.VERSION}-player-waveform.png\n\n" + ) + datacard.sphinx_src_dir = src_dir + player_str = datacard.player() + assert not os.path.exists(media_file) + assert os.path.exists(image_file) + assert player_str == expected_player_str + os.remove(image_file) + + # With sphinx build dir + expected_player_str = ( + ".. raw:: html\n" + "\n" + f'

" + ) + datacard.sphinx_src_dir = None + datacard.sphinx_build_dir = build_dir + player_str = datacard.player() + assert os.path.exists(media_file) + assert not os.path.exists(image_file) + assert player_str == expected_player_str + os.remove(media_file) + + # With sphinx source dir and build dir + expected_player_str = ( + f".. image:: ./{db.name}/{db.name}-{pytest.VERSION}-player-waveform.png\n" + "\n" + ".. raw:: html\n" + "\n" + f'

" + ) datacard.sphinx_build_dir = build_dir datacard.sphinx_src_dir = src_dir player_str = datacard.player() assert os.path.exists(media_file) assert os.path.exists(image_file) + assert expected_player_str == player_str # Expected waveform plot signal, sampling_rate = audiofile.read( @@ -167,18 +211,6 @@ def test_datacard_player(tmpdir, db, cache, request): waveform = open(image_file, "rb").read() assert waveform == expected_waveform - # Append audio to the expected player_str - expected_player_str = ( - f".. image:: ./{db.name}/{db.name}-{pytest.VERSION}-player-waveform.png\n" - "\n" - ".. raw:: html\n" - "\n" - f'

" - ) - # Check if the generated player_str and the expected matches - assert expected_player_str == player_str - @pytest.mark.parametrize( "dbs",