From 7d900cc3ed2ceb01e342fe1a3e1b245dc57e58c8 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 29 Apr 2024 13:45:05 -0700 Subject: [PATCH 1/3] Allow extracted metadata to be any type in response Extracted metadata can be any type not just a dictionary. The `dandi:files` extractor can return extracted metadata as a list for example. --- datalad_registry/blueprints/api/url_metadata/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datalad_registry/blueprints/api/url_metadata/models.py b/datalad_registry/blueprints/api/url_metadata/models.py index f179999d..4023912a 100644 --- a/datalad_registry/blueprints/api/url_metadata/models.py +++ b/datalad_registry/blueprints/api/url_metadata/models.py @@ -1,3 +1,5 @@ +from typing import Any + from pydantic import BaseModel, Field, StrictStr @@ -27,7 +29,7 @@ class URLMetadataModel(_URLMetadataRep): dataset_version: StrictStr extractor_version: StrictStr extraction_parameter: dict - extracted_metadata: dict + extracted_metadata: Any class Config: orm_mode = True From 43013e7dc4b79b9b061999edd087d9833d6d9e11 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 29 Apr 2024 13:46:46 -0700 Subject: [PATCH 2/3] Update test To cover cases in which the extracted metadata is not a dict --- .../test_api/test_url_metadata.py | 71 +++++++++++++------ 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/datalad_registry/tests/test_blueprints/test_api/test_url_metadata.py b/datalad_registry/tests/test_blueprints/test_api/test_url_metadata.py index be9d9be2..f23c39e2 100644 --- a/datalad_registry/tests/test_blueprints/test_api/test_url_metadata.py +++ b/datalad_registry/tests/test_blueprints/test_api/test_url_metadata.py @@ -8,19 +8,31 @@ def populate_with_metadata(flask_app): from datalad_registry.models import RepoUrl, URLMetadata, db url = RepoUrl(url="https://example.com") - url_metadata = URLMetadata( - dataset_describe="abc", - dataset_version="cde", - extractor_name="complete-imagination", - extractor_version="0.1.0", - extraction_parameter={"a": 1, "b": 2}, - extracted_metadata={"brave": "new world", "apple": "1984"}, - url=url, - ) + + url_metadata_lst = [ + URLMetadata( + dataset_describe="abc", + dataset_version="cde", + extractor_name="complete-imagination", + extractor_version="0.1.0", + extraction_parameter={"a": 1, "b": 2}, + extracted_metadata={"brave": "new world", "apple": "1984"}, + url=url, + ), + URLMetadata( + dataset_describe="foo", + dataset_version="bar", + extractor_name="baz", + extractor_version="1.0.0", + extraction_parameter={"x": 10, "y": 20}, + extracted_metadata=["a", 1, {"year": 1984}], + url=url, + ), + ] with flask_app.app_context(): db.session.add(url) - db.session.add(url_metadata) + db.session.add_all(url_metadata_lst) db.session.commit() @@ -32,19 +44,38 @@ def test_not_found(self, flask_client, url_metadata_id): assert resp.status_code == 404 @pytest.mark.usefixtures("populate_with_metadata") - def test_found(self, flask_client): - resp = flask_client.get("/api/v2/url-metadata/1") + @pytest.mark.parametrize( + "url_metadata_id, expected_metadata", + [ + ( + 1, + URLMetadataModel( + dataset_describe="abc", + dataset_version="cde", + extractor_name="complete-imagination", + extractor_version="0.1.0", + extraction_parameter={"a": 1, "b": 2}, + extracted_metadata={"brave": "new world", "apple": "1984"}, + ), + ), + ( + 2, + URLMetadataModel( + dataset_describe="foo", + dataset_version="bar", + extractor_name="baz", + extractor_version="1.0.0", + extraction_parameter={"x": 10, "y": 20}, + extracted_metadata=["a", 1, {"year": 1984}], + ), + ), + ], + ) + def test_found(self, url_metadata_id, expected_metadata, flask_client): + resp = flask_client.get(f"/api/v2/url-metadata/{url_metadata_id}") assert resp.status_code == 200 returned_metadata = URLMetadataModel.parse_obj(resp.json) - expected_metadata = URLMetadataModel( - dataset_describe="abc", - dataset_version="cde", - extractor_name="complete-imagination", - extractor_version="0.1.0", - extraction_parameter={"a": 1, "b": 2}, - extracted_metadata={"brave": "new world", "apple": "1984"}, - ) assert returned_metadata == expected_metadata From b04c883ca3d57490cfdf82cf17ee8c7b5d774185 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 29 Apr 2024 14:05:57 -0700 Subject: [PATCH 3/3] Remove code redundancy in tests --- .../test_api/test_url_metadata.py | 46 ++++++------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/datalad_registry/tests/test_blueprints/test_api/test_url_metadata.py b/datalad_registry/tests/test_blueprints/test_api/test_url_metadata.py index f23c39e2..9bc3a974 100644 --- a/datalad_registry/tests/test_blueprints/test_api/test_url_metadata.py +++ b/datalad_registry/tests/test_blueprints/test_api/test_url_metadata.py @@ -1,11 +1,17 @@ import pytest from datalad_registry.blueprints.api.url_metadata import URLMetadataModel +from datalad_registry.models import RepoUrl, URLMetadata, db @pytest.fixture -def populate_with_metadata(flask_app): - from datalad_registry.models import RepoUrl, URLMetadata, db +def populated_metadata(flask_app) -> list[URLMetadataModel]: + """ + Populate the database with URLMetadata instances. + + :return: The list of URLMetadataModel instances representing the URLMetadata + instances is populated to the database. + """ url = RepoUrl(url="https://example.com") @@ -35,6 +41,10 @@ def populate_with_metadata(flask_app): db.session.add_all(url_metadata_lst) db.session.commit() + return [ + URLMetadataModel.from_orm(url_metadata) for url_metadata in url_metadata_lst + ] + class TestURLMetadata: @pytest.mark.parametrize("url_metadata_id", [1, 2, 3, 60, 71, 100]) @@ -43,39 +53,13 @@ def test_not_found(self, flask_client, url_metadata_id): resp = flask_client.get(f"/api/v2/url-metadata/{url_metadata_id}") assert resp.status_code == 404 - @pytest.mark.usefixtures("populate_with_metadata") - @pytest.mark.parametrize( - "url_metadata_id, expected_metadata", - [ - ( - 1, - URLMetadataModel( - dataset_describe="abc", - dataset_version="cde", - extractor_name="complete-imagination", - extractor_version="0.1.0", - extraction_parameter={"a": 1, "b": 2}, - extracted_metadata={"brave": "new world", "apple": "1984"}, - ), - ), - ( - 2, - URLMetadataModel( - dataset_describe="foo", - dataset_version="bar", - extractor_name="baz", - extractor_version="1.0.0", - extraction_parameter={"x": 10, "y": 20}, - extracted_metadata=["a", 1, {"year": 1984}], - ), - ), - ], - ) - def test_found(self, url_metadata_id, expected_metadata, flask_client): + @pytest.mark.parametrize("url_metadata_id", [1, 2]) + def test_found(self, url_metadata_id, populated_metadata, flask_client): resp = flask_client.get(f"/api/v2/url-metadata/{url_metadata_id}") assert resp.status_code == 200 returned_metadata = URLMetadataModel.parse_obj(resp.json) + expected_metadata = populated_metadata[url_metadata_id - 1] assert returned_metadata == expected_metadata