Skip to content

Commit

Permalink
capture thumbnails during europeana ingestion (#4447)
Browse files Browse the repository at this point in the history
  • Loading branch information
madewithkode authored Jun 11, 2024
1 parent a8a2e76 commit 36b693a
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 0 deletions.
9 changes: 9 additions & 0 deletions catalog/dags/providers/provider_api_scripts/europeana.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def get_record_data(self, data: dict) -> dict | None:
"license_info": self._get_license_info(data),
"filetype": self._get_filetype(item_data),
"filesize": self._get_filesize(item_data),
"thumbnail_url": self._get_thumbnail(data),
} | self._get_image_dimensions(item_data)

data_providers = set(record["meta_data"]["dataProvider"])
Expand Down Expand Up @@ -181,6 +182,14 @@ def _get_description(self, data: dict) -> str | None:

return ""

def _get_thumbnail(self, data: dict) -> str | None:
# looks like edmPreview can either be a list or string
# this was inferred from observing the difference
# between sample data in item_full.json and europeana_example.json
# so it's best to handle both cases.
if preview := data.get("edmPreview", None):
return preview[0] if isinstance(preview, list) else preview


class EuropeanaDataIngester(ProviderDataIngester):
providers = {"image": prov.EUROPEANA_DEFAULT_PROVIDER}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def test_record_builder_get_record_data(ingester, record_builder):
"width": 381,
"filesize": 36272,
"filetype": "jpeg",
"thumbnail_url": "https://api.europeana.eu/api/v2/thumbnail-by-url.json?uri=http%3A%2F%2Fbibliotecadigital.jcyl.es%2Fi18n%2Fcatalogo_imagenes%2Fimagen_id.cmd%3FidImagen%3D102620362&type=IMAGE",
}


Expand Down Expand Up @@ -416,6 +417,7 @@ def test_process_image_data_with_sub_provider(record_builder):
),
"meta_data": expect_meta_data,
"source": "wellcome_collection",
"thumbnail_url": "https://api.europeana.eu/thumbnail/v2/url.json?uri=https%3A%2F%2Fiiif.wellcomecollection.org%2Fimage%2FV0013398.jpg%2Ffull%2F500%2C%2F0%2Fdefault.jpg&type=IMAGE",
}


Expand Down Expand Up @@ -460,3 +462,24 @@ def test_record_builder_returns_None_if_missing_required_field(
image_data[field_name] = value

assert record_builder.get_record_data(image_data) is None


@pytest.mark.parametrize(
"data, expected",
[
pytest.param({}, None, id="empty_object"),
pytest.param(
{"edmPreview": ["preview_is_list", "preview_is_list_2"]},
"preview_is_list",
id="preview_is_list",
),
pytest.param(
{"edmPreview": "preview_is_string"},
"preview_is_string",
id="preview_is_string",
),
pytest.param({"no": "thumbnail"}, None, id="no_thumbnail"),
],
)
def test_get_thumbnail(data, expected, record_builder):
assert record_builder._get_thumbnail(data) == expected

0 comments on commit 36b693a

Please sign in to comment.