From 48d3c1140b6b2dba9dae4f99c1edae8a7bbb7ebc Mon Sep 17 00:00:00 2001 From: Douglas Cerna Date: Thu, 29 Aug 2024 14:16:26 -0600 Subject: [PATCH 1/8] Prefetch related models in FPR views --- pyproject.toml | 1 + src/dashboard/src/fpr/forms.py | 22 +- src/dashboard/src/fpr/views.py | 22 +- tests/dashboard/fpr/test_views.py | 326 +++++++++++++++++++----------- 4 files changed, 244 insertions(+), 127 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 34d52fe0e..86d11bc48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ module = [ "src.MCPClient.lib.clientScripts.policy_check", "src.MCPClient.lib.clientScripts.transcribe_file", "src.MCPClient.lib.clientScripts.validate_file", + "tests.dashboard.fpr.test_views", "tests.MCPClient.conftest", "tests.MCPClient.test_characterize_file", "tests.MCPClient.test_has_packages", diff --git a/src/dashboard/src/fpr/forms.py b/src/dashboard/src/fpr/forms.py index 39ee6a0f1..e2e23a2ba 100644 --- a/src/dashboard/src/fpr/forms.py +++ b/src/dashboard/src/fpr/forms.py @@ -79,8 +79,14 @@ class IDRuleForm(forms.ModelForm): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Limit to only enabled formats/commands - self.fields["format"].queryset = fprmodels.FormatVersion.active.all() - self.fields["command"].queryset = fprmodels.IDCommand.active.all() + self.fields[ + "format" + ].queryset = fprmodels.FormatVersion.active.all().prefetch_related( + "format__group" + ) + self.fields[ + "command" + ].queryset = fprmodels.IDCommand.active.all().prefetch_related("tool") class Meta: model = fprmodels.IDRule @@ -110,7 +116,11 @@ def __init__(self, *args, **kwargs): self.fields["command"].initial = self.instance.command.uuid # Show only active format versions in the format dropdown - self.fields["format"].queryset = fprmodels.FormatVersion.active.all() + self.fields[ + "format" + ].queryset = fprmodels.FormatVersion.active.all().prefetch_related( + "format__group" + ) def clean(self): cleaned_data = super().clean() @@ -168,6 +178,12 @@ class FPCommandForm(forms.ModelForm): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self.fields[ + "output_format" + ].queryset = fprmodels.FormatVersion.active.all().prefetch_related( + "format__group" + ) + verification_commands = fprmodels.FPCommand.active.filter( command_usage="verification" ) diff --git a/src/dashboard/src/fpr/views.py b/src/dashboard/src/fpr/views.py index 378b9dd58..7b15734ba 100644 --- a/src/dashboard/src/fpr/views.py +++ b/src/dashboard/src/fpr/views.py @@ -317,7 +317,9 @@ def idrule_list(request): "replaces_id" ) ] - idrules = fprmodels.IDRule.objects.exclude(uuid__in=replacing_rules) + idrules = fprmodels.IDRule.objects.exclude( + uuid__in=replacing_rules + ).prefetch_related("format__format__group", "command") return render(request, "fpr/idrule/list.html", context(locals())) @@ -391,7 +393,9 @@ def idcommand_list(request): "replaces_id" ) ] - idcommands = fprmodels.IDCommand.objects.exclude(uuid__in=replacing_commands) + idcommands = fprmodels.IDCommand.objects.exclude( + uuid__in=replacing_commands + ).prefetch_related("tool") return render(request, "fpr/idcommand/list.html", context(locals())) @@ -493,7 +497,11 @@ def fprule_list(request, usage=None): else: opts = {} # Display disabled rules as long as they aren't replaced by another rule - fprules = fprmodels.FPRule.objects.filter(**opts).exclude(uuid__in=replacing_rules) + fprules = ( + fprmodels.FPRule.objects.filter(**opts) + .exclude(uuid__in=replacing_rules) + .prefetch_related("format__format__group", "command") + ) return render(request, "fpr/fprule/list.html", context(locals())) @@ -621,8 +629,10 @@ def fpcommand_list(request, usage=None): "replaces_id" ) ] - fpcommands = fprmodels.FPCommand.objects.filter(**opts).exclude( - uuid__in=replacing_commands + fpcommands = ( + fprmodels.FPCommand.objects.filter(**opts) + .exclude(uuid__in=replacing_commands) + .prefetch_related("tool") ) return render(request, "fpr/fpcommand/list.html", context(locals())) @@ -641,7 +651,7 @@ def fpcommand_edit(request, uuid=None): title = _("Replace command %(name)s") % {"name": fpcommand.description} else: fpcommand = None - title = _("Create format version") + title = _("Create format policy command") if request.method == "POST": form = fprforms.FPCommandForm(request.POST, instance=fpcommand) if form.is_valid(): diff --git a/tests/dashboard/fpr/test_views.py b/tests/dashboard/fpr/test_views.py index 3f1882207..6b246d21f 100644 --- a/tests/dashboard/fpr/test_views.py +++ b/tests/dashboard/fpr/test_views.py @@ -1,122 +1,129 @@ +import uuid + import pytest from components import helpers -from django.contrib.auth.models import User -from django.test import TestCase +from django.test import Client from django.urls import reverse -from fpr.models import Format -from fpr.models import FormatGroup -from fpr.models import FPCommand -from fpr.models import FPTool -from fpr.models import IDTool - - -class TestViews(TestCase): - def setUp(self): - user = User.objects.create_superuser("demo", "demo@example.com", "demo") - self.client.login(username=user.username, password="demo") - helpers.set_setting("dashboard_uuid", "test-uuid") - - def test_idcommand_create(self): - url = reverse("fpr:idcommand_create") - tool = IDTool.objects.create( - uuid="37f3bd7c-bb24-4899-b7c4-785ff1c764ac", - description="Foobar", - version="v1.2.3", - ) - - resp = self.client.get(url) - self.assertEqual(resp.context["form"].initial["tool"], None) - - resp = self.client.get(url, {"parent": "c80458d9-2b62-40f4-b61c-936bfb72901d"}) - self.assertEqual(resp.context["form"].initial["tool"], None) - - resp = self.client.get(url, {"parent": tool.uuid}) - self.assertEqual(resp.context["form"].initial["tool"], tool) - - def test_fpcommand_create(self): - url = reverse("fpr:fpcommand_create") - tool = FPTool.objects.create( - uuid="37f3bd7c-bb24-4899-b7c4-785ff1c764ac", - description="Foobar", - version="v1.2.3", - ) - - resp = self.client.get(url) - self.assertEqual(resp.context["form"].initial["tool"], None) - - resp = self.client.get(url, {"parent": "d993bdcf-a944-4df8-b960-1b20c14ffe68"}) - self.assertEqual(resp.context["form"].initial["tool"], None) - - resp = self.client.get(url, {"parent": tool.uuid}) - self.assertEqual(resp.context["form"].initial["tool"], tool) - - def test_fpcommand_edit(self): - fpcommand_id = "41112047-7ddf-4bf0-9156-39fe96b32d53" - url = reverse("fpr:fpcommand_edit", args=[fpcommand_id]) - - fpcommand = FPCommand.active.get(uuid=fpcommand_id) - self.assertEqual(fpcommand.description, "Copying file to access directory") - - form_data = { - "verification_command": ["ef3ea000-0c3c-4cae-adc2-aa2a6ccbffce"], - "description": ["new description"], - "tool": ["0efc346e-6373-4799-819d-17cc0f21f827"], - "event_detail_command": [""], - "output_location": [ - "%outputDirectory%%prefix%%fileName%%postfix%%fileExtensionWithDot%" - ], - "command_usage": ["normalization"], - "command": [ - 'cp -R "%inputFile%" "%outputDirectory%%prefix%%fileName%%postfix%%fileExtensionWithDot%"' - ], - "csrfmiddlewaretoken": [ - "k5UUufiJuSOLNOGJYlU2ODow5iKPhOuLc9Q0EmUoIXsQLZ7r5Ede7Pf0pSQEm0lP" - ], - "output_format": ["0ab4cd40-90e7-4d75-b294-498177b3897d"], - "script_type": ["command"], - } - resp = self.client.post(url, follow=True, data=form_data) - self.assertEqual(resp.status_code, 200) - - # Our fpcommand is now expected to be disabled. - fpcommand = FPCommand.objects.get(uuid=fpcommand_id) - self.assertEqual(fpcommand.enabled, False) - - # And replaced by a new fpcommand. - fpcommand = FPCommand.active.get(replaces_id=fpcommand_id) - self.assertEqual(fpcommand.description, "new description") - - def test_fpcommand_delete(self): - fpcommand_id = "0fd7935a-ed0d-4f67-aa25-1b44684f6aca" - url = reverse("fpr:fpcommand_delete", args=[fpcommand_id]) - - self.assertEqual(FPCommand.active.filter(uuid=fpcommand_id).exists(), True) - - resp = self.client.post(url, follow=True, data={"disable": True}) - - self.assertEqual(resp.status_code, 200) - self.assertEqual(FPCommand.active.filter(uuid=fpcommand_id).exists(), False) - - def test_fpcommand_revisions(self): - fpcommand_id = "cb335c49-e6ce-445f-a774-494a6f2300c6" - url = reverse("fpr:revision_list", args=["fpcommand", fpcommand_id]) - fpcommand = FPCommand.active.get(uuid=fpcommand_id) - - resp = self.client.get(url, follow=True) - - # Assert that the revision list shows multiple instances. - self.assertContains(resp, fpcommand.uuid) - self.assertContains(resp, fpcommand.replaces_id) +from fpr import models + + +@pytest.fixture +def dashboard_uuid() -> None: + helpers.set_setting("dashboard_uuid", str(uuid.uuid4())) + + +@pytest.mark.django_db +def test_idcommand_create(dashboard_uuid: None, admin_client: Client) -> None: + url = reverse("fpr:idcommand_create") + tool = models.IDTool.objects.create( + uuid="37f3bd7c-bb24-4899-b7c4-785ff1c764ac", + description="Foobar", + version="v1.2.3", + ) + + resp = admin_client.get(url) + assert resp.context["form"].initial["tool"] is None + + resp = admin_client.get(url, {"parent": str(uuid.uuid4())}) + assert resp.context["form"].initial["tool"] is None + + resp = admin_client.get(url, {"parent": tool.uuid}) + assert resp.context["form"].initial["tool"] == tool + + +@pytest.mark.django_db +def test_fpcommand_create(dashboard_uuid: None, admin_client: Client) -> None: + url = reverse("fpr:fpcommand_create") + tool = models.FPTool.objects.create( + uuid="37f3bd7c-bb24-4899-b7c4-785ff1c764ac", + description="Foobar", + version="v1.2.3", + ) + + resp = admin_client.get(url) + assert resp.context["form"].initial["tool"] is None + + resp = admin_client.get(url, {"parent": str(uuid.uuid4())}) + assert resp.context["form"].initial["tool"] is None + + resp = admin_client.get(url, {"parent": tool.uuid}) + assert resp.context["form"].initial["tool"] == tool + + +@pytest.mark.django_db +def test_fpcommand_edit(dashboard_uuid: None, admin_client: Client) -> None: + fpcommand_id = "41112047-7ddf-4bf0-9156-39fe96b32d53" + url = reverse("fpr:fpcommand_edit", args=[fpcommand_id]) + + fpcommand = models.FPCommand.active.get(uuid=fpcommand_id) + assert fpcommand.description == "Copying file to access directory" + + form_data = { + "verification_command": ["ef3ea000-0c3c-4cae-adc2-aa2a6ccbffce"], + "description": ["new description"], + "tool": ["0efc346e-6373-4799-819d-17cc0f21f827"], + "event_detail_command": [""], + "output_location": [ + "%outputDirectory%%prefix%%fileName%%postfix%%fileExtensionWithDot%" + ], + "command_usage": ["normalization"], + "command": [ + 'cp -R "%inputFile%" "%outputDirectory%%prefix%%fileName%%postfix%%fileExtensionWithDot%"' + ], + "csrfmiddlewaretoken": [ + "k5UUufiJuSOLNOGJYlU2ODow5iKPhOuLc9Q0EmUoIXsQLZ7r5Ede7Pf0pSQEm0lP" + ], + "output_format": ["0ab4cd40-90e7-4d75-b294-498177b3897d"], + "script_type": ["command"], + } + resp = admin_client.post(url, follow=True, data=form_data) + assert resp.status_code == 200 + + # Our fpcommand is now expected to be disabled. + fpcommand = models.FPCommand.objects.get(uuid=fpcommand_id) + assert not fpcommand.enabled + + # And replaced by a new fpcommand. + fpcommand = models.FPCommand.active.get(replaces_id=fpcommand_id) + assert fpcommand.description == "new description" + + +@pytest.mark.django_db +def test_fpcommand_delete(dashboard_uuid: None, admin_client: Client) -> None: + fpcommand_id = "0fd7935a-ed0d-4f67-aa25-1b44684f6aca" + url = reverse("fpr:fpcommand_delete", args=[fpcommand_id]) + + assert models.FPCommand.active.filter(uuid=fpcommand_id).exists() + + resp = admin_client.post(url, follow=True, data={"disable": True}) + + assert resp.status_code == 200 + assert not models.FPCommand.active.filter(uuid=fpcommand_id).exists() + + +@pytest.mark.django_db +def test_fpcommand_revisions(dashboard_uuid: None, admin_client: Client) -> None: + fpcommand_id = "cb335c49-e6ce-445f-a774-494a6f2300c6" + url = reverse("fpr:revision_list", args=["fpcommand", fpcommand_id]) + fpcommand = models.FPCommand.active.get(uuid=fpcommand_id) + + resp = admin_client.get(url, follow=True) + + # Assert that the revision list shows multiple instances. + content = resp.content.decode() + assert str(fpcommand.uuid) in content + assert str(fpcommand.replaces_id) in content @pytest.mark.django_db -def test_format_create_creates_format(admin_client): - helpers.set_setting("dashboard_uuid", "test-uuid") +def test_format_create_creates_format( + dashboard_uuid: None, admin_client: Client +) -> None: # Add a new format to the Unknown group. - unknown_group = FormatGroup.objects.get(description="Unknown") + unknown_group = models.FormatGroup.objects.get(description="Unknown") format_description = "My test format" - assert Format.objects.filter(description=format_description).count() == 0 + assert models.Format.objects.filter(description=format_description).count() == 0 response = admin_client.post( reverse("fpr:format_create"), @@ -129,7 +136,7 @@ def test_format_create_creates_format(admin_client): assert "Saved" in content assert "Format My test format" in content assert ( - Format.objects.filter( + models.Format.objects.filter( description=format_description, group=unknown_group ).count() == 1 @@ -137,20 +144,19 @@ def test_format_create_creates_format(admin_client): @pytest.mark.django_db -def test_format_edit_updates_format(admin_client): - helpers.set_setting("dashboard_uuid", "test-uuid") +def test_format_edit_updates_format(dashboard_uuid: None, admin_client: Client) -> None: # Get details of the Matroska format from the Video group. - video_group = FormatGroup.objects.get(description="Video") - format = Format.objects.get(description="Matroska", group=video_group) + video_group = models.FormatGroup.objects.get(description="Video") + format = models.Format.objects.get(description="Matroska", group=video_group) format_uuid = format.uuid format_slug = format.slug # Update the group and description of the Matroska format. - unknown_group = FormatGroup.objects.get(description="Unknown") + unknown_group = models.FormatGroup.objects.get(description="Unknown") new_format_description = "My matroska format" assert ( - Format.objects.filter( + models.Format.objects.filter( description=new_format_description, group=unknown_group ).count() == 0 @@ -167,7 +173,7 @@ def test_format_edit_updates_format(admin_client): assert "Saved" in content assert "Format My matroska format" in content assert ( - Format.objects.filter( + models.Format.objects.filter( uuid=format_uuid, slug=format_slug, description=new_format_description, @@ -175,3 +181,87 @@ def test_format_edit_updates_format(admin_client): ).count() == 1 ) + + +@pytest.mark.django_db +def test_idrule_create(dashboard_uuid: None, admin_client: Client) -> None: + url = reverse("fpr:idrule_create") + + resp = admin_client.get(url) + + assert resp.context["form"].initial == {} + assert "Create identification rule" in resp.content.decode() + + format_version = models.FormatVersion.objects.create( + format=models.Format.objects.create( + group=models.FormatGroup.objects.create(description="Group"), + description="Format", + ), + description="Format version", + ) + command = models.IDCommand.objects.create( + tool=models.IDTool.objects.create(description="Tool") + ) + command_output = ".ppt" + + resp = admin_client.post( + url, + { + "format": format_version.uuid, + "command": command.uuid, + "command_output": command_output, + }, + follow=True, + ) + + assert "Saved." in resp.content.decode() + assert ( + models.IDRule.objects.filter( + format=format_version.uuid, + command=command.uuid, + command_output=command_output, + ).count() + == 1 + ) + + +@pytest.mark.django_db +def test_fprule_create(dashboard_uuid: None, admin_client: Client) -> None: + url = reverse("fpr:fprule_create") + + resp = admin_client.get(url) + + assert resp.context["form"].initial == {} + assert "Create format policy rule" in resp.content.decode() + + purpose = models.FPRule.CHARACTERIZATION + format_version = models.FormatVersion.objects.create( + format=models.Format.objects.create( + group=models.FormatGroup.objects.create(description="Group"), + description="Format", + ), + description="Format version", + ) + command = models.FPCommand.objects.create( + tool=models.FPTool.objects.create(description="Tool") + ) + + resp = admin_client.post( + url, + { + "f-purpose": purpose, + "f-format": format_version.uuid, + "f-command": command.uuid, + }, + follow=True, + ) + + assert "Saved." in resp.content.decode() + assert ( + models.FPRule.objects.filter( + purpose=purpose, + format=format_version.uuid, + command=command.uuid, + ).count() + == 1 + ) From 09418abbcb3c4144eb8abbea914c31370ad33df6 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Thu, 5 Sep 2024 15:34:56 +0000 Subject: [PATCH 2/8] Upgrade Python requirements --- requirements-dev.txt | 12 ++++++------ requirements.txt | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 2e008dd19..3e3453b17 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -27,11 +27,11 @@ build==1.2.1 # via pip-tools cachetools==5.5.0 # via tox -certifi==2024.7.4 +certifi==2024.8.30 # via # -r requirements.txt # requests -cffi==1.17.0 +cffi==1.17.1 # via # -r requirements.txt # cryptography @@ -51,7 +51,7 @@ coverage[toml]==7.6.1 # via # -r requirements-dev.in # pytest-cov -cryptography==43.0.0 +cryptography==43.0.1 # via # -r requirements.txt # josepy @@ -59,7 +59,7 @@ cryptography==43.0.0 # pyopenssl distlib==0.3.8 # via virtualenv -django==4.2.15 +django==4.2.16 # via # -r requirements.txt # django-auth-ldap @@ -209,7 +209,7 @@ pytest==8.3.2 # pytest-randomly pytest-cov==5.0.0 # via -r requirements-dev.in -pytest-django==4.8.0 +pytest-django==4.9.0 # via -r requirements-dev.in pytest-mock==3.14.0 # via -r requirements-dev.in @@ -305,7 +305,7 @@ zope-interface==7.0.3 # The following packages are considered to be unsafe in a requirements file: pip==24.2 # via pip-tools -setuptools==74.0.0 +setuptools==74.1.2 # via # -r requirements.txt # pip-tools diff --git a/requirements.txt b/requirements.txt index d7b5c8437..dbf479dc6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,20 +20,20 @@ bagit @ git+https://github.com/artefactual-labs/bagit-python.git@4b8fde73b4e6314 # via -r requirements.in brotli==1.1.0 # via -r requirements.in -certifi==2024.7.4 +certifi==2024.8.30 # via requests -cffi==1.17.0 +cffi==1.17.1 # via cryptography charset-normalizer==3.3.2 # via requests clamd==1.0.2 # via -r requirements.in -cryptography==43.0.0 +cryptography==43.0.1 # via # josepy # mozilla-django-oidc # pyopenssl -django==4.2.15 +django==4.2.16 # via # -r requirements.in # django-auth-ldap @@ -172,7 +172,7 @@ zope-interface==7.0.3 # via gevent # The following packages are considered to be unsafe in a requirements file: -setuptools==74.0.0 +setuptools==74.1.2 # via # zope-event # zope-interface From f99179666441c0e452fccf34e10275278d9c1b63 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Tue, 10 Sep 2024 20:53:18 +0000 Subject: [PATCH 3/8] Add manual normalization AMAUAT to test matrix --- .github/workflows/acceptance-test.yml | 1 + hack/submodules/archivematica-acceptance-tests | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/acceptance-test.yml b/.github/workflows/acceptance-test.yml index cf6e886b9..255217160 100644 --- a/.github/workflows/acceptance-test.yml +++ b/.github/workflows/acceptance-test.yml @@ -25,6 +25,7 @@ jobs: - "icc" - "ipc preservation" - "ipc access" + - "man-norm" - "metadata-xml" - "tcc" - "tpc" diff --git a/hack/submodules/archivematica-acceptance-tests b/hack/submodules/archivematica-acceptance-tests index d44098ee2..e5887b48d 160000 --- a/hack/submodules/archivematica-acceptance-tests +++ b/hack/submodules/archivematica-acceptance-tests @@ -1 +1 @@ -Subproject commit d44098ee288fff0a4adb5c35b9f094ba04fa9cda +Subproject commit e5887b48d695b2a8050c828d167807699ad8f457 From 0212db1978af3eb390ebdec205598ce4b2cc4fa9 Mon Sep 17 00:00:00 2001 From: Douglas Cerna Date: Wed, 11 Sep 2024 07:41:22 -0600 Subject: [PATCH 4/8] Remove FITS This removes: * the fits service and its package dependencies from the Compose environment * the FITS_v0.0 client script from the MCPClient and updates the MCPServer workflow to use the identifyFileFormat_v0.0 and characterizeFile_v0.0 scripts for manually normalized preservation files * the FPR model instances (FPTool, FPCommand and FPRule) related to FITS with a database data migration in the fpr application of the Dashboard --- hack/Dockerfile | 2 - hack/README.md | 2 - hack/docker-compose.yml | 11 -- src/MCPClient/lib/archivematicaClientModules | 1 - .../lib/clientScripts/characterize_file.py | 2 +- src/MCPClient/lib/clientScripts/fits.py | 124 ------------------ ...ization_create_metadata_and_restructure.py | 7 +- ..._normalization_move_access_files_to_dip.py | 2 +- src/MCPServer/lib/assets/workflow.json | 89 ++++++------- .../src/fpr/migrations/0044_remove_fits.py | 12 ++ 10 files changed, 63 insertions(+), 189 deletions(-) delete mode 100755 src/MCPClient/lib/clientScripts/fits.py create mode 100644 src/dashboard/src/fpr/migrations/0044_remove_fits.py diff --git a/hack/Dockerfile b/hack/Dockerfile index 3625f501e..e02c1cef9 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -168,7 +168,6 @@ RUN set -ex \ clamav \ coreutils \ ffmpeg \ - fits \ g++ \ gcc \ gearman \ @@ -190,7 +189,6 @@ RUN set -ex \ md5deep \ mediaconch \ mediainfo \ - nailgun \ nfs-common \ openjdk-8-jre-headless \ p7zip-full \ diff --git a/hack/README.md b/hack/README.md index 72d2d3b1e..1c0063b10 100644 --- a/hack/README.md +++ b/hack/README.md @@ -64,7 +64,6 @@ am-archivematica-mcp-server-1 39.43MiB / 7.763GiB am-archivematica-storage-service-1 83.96MiB / 7.763GiB am-nginx-1 2.715MiB / 7.763GiB am-elasticsearch-1 900.2MiB / 7.763GiB -am-fits-1 71.09MiB / 7.763GiB am-gearmand-1 3.395MiB / 7.763GiB am-mysql-1 551.9MiB / 7.763GiB am-clamavd-1 570MiB / 7.763GiB @@ -312,7 +311,6 @@ echo workers | socat - tcp:127.0.0.1:62004,shut-none | grep "_v0.0" | awk '{prin | mysql | `tcp/3306` | `tcp/62001` | | elasticsearch | `tcp/9200` | `tcp/62002` | | gearman | `tcp/4730` | `tcp/62004` | -| fits | `tcp/2113` | `tcp/62005` | | clamavd | `tcp/3310` | `tcp/62006` | | nginx » archivematica-dashboard | `tcp/80` | `tcp/62080` | | nginx » archivematica-storage-service | `tcp/8000` | `tcp/62081` | diff --git a/hack/docker-compose.yml b/hack/docker-compose.yml index 9e4be90de..b123bb4d3 100644 --- a/hack/docker-compose.yml +++ b/hack/docker-compose.yml @@ -76,14 +76,6 @@ services: ports: - "127.0.0.1:62004:4730" - fits: - image: "artefactual/fits-ngserver:0.8.4" - user: ${USER_ID:-1000} - ports: - - "127.0.0.1:62005:2113" - volumes: - - "archivematica_pipeline_data:/var/archivematica/sharedDirectory:rw" # Read and write needed! - clamavd: image: "artefactual/clamav:latest" environment: @@ -146,8 +138,6 @@ services: environment: DJANGO_SECRET_KEY: "12345" DJANGO_SETTINGS_MODULE: "settings.common" - NAILGUN_SERVER: "fits" - NAILGUN_PORT: "2113" ARCHIVEMATICA_MCPCLIENT_CLIENT_USER: "archivematica" ARCHIVEMATICA_MCPCLIENT_CLIENT_PASSWORD: "demo" ARCHIVEMATICA_MCPCLIENT_CLIENT_HOST: "mysql" @@ -169,7 +159,6 @@ services: - "../:/src" - "archivematica_pipeline_data:/var/archivematica/sharedDirectory:rw" links: - - "fits" - "clamavd" - "mysql" - "gearmand" diff --git a/src/MCPClient/lib/archivematicaClientModules b/src/MCPClient/lib/archivematicaClientModules index 28f259eab..4dff8ceab 100644 --- a/src/MCPClient/lib/archivematicaClientModules +++ b/src/MCPClient/lib/archivematicaClientModules @@ -26,7 +26,6 @@ removeunneededfiles_v0.0 = remove_unneeded_files archivematicaclamscan_v0.0 = archivematica_clamscan createevent_v0.0 = create_event examinecontents_v0.0 = examine_contents -fits_v0.0 = fits identifydspacefiles_v0.0 = identify_dspace_files identifydspacemetsfiles_v0.0 = identify_dspace_mets_files identifyfileformat_v0.0 = identify_file_format diff --git a/src/MCPClient/lib/clientScripts/characterize_file.py b/src/MCPClient/lib/clientScripts/characterize_file.py index f9c0cc239..dd875d5a2 100755 --- a/src/MCPClient/lib/clientScripts/characterize_file.py +++ b/src/MCPClient/lib/clientScripts/characterize_file.py @@ -5,7 +5,7 @@ # b) Prints the tool's stdout, for tools which do not output XML # # If a tool has no defined characterization commands, then the default -# will be run instead (currently FITS). +# will be run instead. import argparse import dataclasses import multiprocessing diff --git a/src/MCPClient/lib/clientScripts/fits.py b/src/MCPClient/lib/clientScripts/fits.py deleted file mode 100755 index 6012d0e96..000000000 --- a/src/MCPClient/lib/clientScripts/fits.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python -# This file is part of Archivematica. -# -# Copyright 2010-2013 Artefactual Systems Inc. -# -# Archivematica is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Archivematica is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Archivematica. If not, see . -import os -import tempfile - -import django -import lxml.etree as etree -from archivematicaFunctions import getTagged -from custom_handlers import get_script_logger -from databaseFunctions import insertIntoFPCommandOutput -from django.db import transaction -from executeOrRunSubProcess import executeOrRun - -# archivematicaCommon - -django.setup() -# dashboard -from main.models import FPCommandOutput - -logger = get_script_logger("archivematica.mcp.client.FITS") - -FITSNS = "{http://hul.harvard.edu/ois/xml/ns/fits/fits_output}" - - -def exclude_jhove_properties(fits): - """ - Exclude from "/fits/toolOutput/tool[name=Jhove]/repInfo" - because that field contains unnecessary excess data and the key data are - covered by output from other FITS tools. - """ - format_validation = None - tools = getTagged(getTagged(fits, FITSNS + "toolOutput")[0], FITSNS + "tool") - for tool in tools: - if tool.get("name") == "Jhove": - format_validation = tool - break - if format_validation is None: - return fits - repInfo = getTagged(format_validation, "repInfo")[0] - properties = getTagged(repInfo, "properties") - if len(properties): - repInfo.remove(properties[0]) - return fits - - -def main(target, xml_file, date, event_uuid, file_uuid, file_grpuse): - """ - Note: xml_file, date and event_uuid are not being used. - """ - if file_grpuse in ("DSPACEMETS", "maildirFile"): - logger.error("File's fileGrpUse in exclusion list, skipping") - return 0 - - if not FPCommandOutput.objects.filter(file=file_uuid).exists(): - logger.error("Warning: Fits has already run on this file. Not running again.") - return 0 - - _, temp_file = tempfile.mkstemp() - args = ["fits.sh", "-i", target, "-o", temp_file] - try: - logger.info("Executing %s", args) - retcode, stdout, stderr = executeOrRun( - "command", args, printing=False, capture_output=True - ) - - if retcode != 0: - logger.error( - "fits.sh exited with status code %s, %s, %s", retcode, stdout, stderr - ) - return retcode - - try: - tree = etree.parse(temp_file) - except Exception: - logger.exception("Failed to read Fits's XML.") - return 2 - - fits = tree.getroot() - fits = exclude_jhove_properties(fits) - - # NOTE: This is hardcoded for now because FPCommandOutput references FPRule for future development, - # when characterization will become user-configurable and be decoupled from FITS specifically. - # Thus a stub rule must exist for FITS; this will be replaced with a real rule in the future. - logger.info("Storing output of file characterization...") - insertIntoFPCommandOutput( - file_uuid, - etree.tostring(fits, pretty_print=False, encoding="utf8"), - "3a19de70-0e42-4145-976b-3a248d43b462", - ) - - except (OSError, ValueError): - logger.exception("Execution failed") - return 1 - - finally: - # We are responsible for removing the temporary file and we do it here - # to ensure that it's going to happen whatever occurs inside our try - # block. - os.remove(temp_file) - - return 0 - - -def call(jobs): - with transaction.atomic(): - for job in jobs: - with job.JobContext(logger=logger): - args = job.args[1:] - job.set_status(main(*args)) diff --git a/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py b/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py index bbfd3e6d2..c0d711820 100755 --- a/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py +++ b/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py @@ -120,7 +120,12 @@ def main(job): # We found the original file somewhere above job.print_output( "Matched original file %s (%s) to preservation file %s (%s)" - % (original_file.currentlocation, original_file.uuid, filePath, fileUUID) + % ( + original_file.currentlocation.decode(), + original_file.uuid, + filePath, + fileUUID, + ) ) # Generate the new preservation path: path/to/original/filename-uuid.ext basename = os.path.basename(filePath) diff --git a/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py b/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py index cbecf6fd7..2b0345b88 100755 --- a/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py +++ b/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py @@ -106,7 +106,7 @@ def main(job): } f = File.objects.get(**kwargs) else: - if isinstance(e, File.DoesNotExist, ValidationError): + if isinstance(e, (File.DoesNotExist, ValidationError)): job.print_error( "No matching file for: ", opts.filePath.replace(opts.SIPDirectory, "%SIPDirectory%", 1), diff --git a/src/MCPServer/lib/assets/workflow.json b/src/MCPServer/lib/assets/workflow.json index 9a917cea5..a1d72cf02 100644 --- a/src/MCPServer/lib/assets/workflow.json +++ b/src/MCPServer/lib/assets/workflow.json @@ -1927,15 +1927,15 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%relativeLocation%\" \"%SIPLogsDirectory%fileMeta/%fileUUID%.xml\" \"%date%\" \"%taskUUID%\" \"%fileUUID%\" \"%fileGrpUse%\"", - "execute": "FITS_v0.0", + "arguments": "\"%fileUUID%\" \"%SIPUUID%\"", + "execute": "characterizeFile_v0.0", "filter_subdir": "objects/manualNormalization/preservation" }, "description": { - "en": "Run FITS on manual normalized preservation files", - "no": "Kjør FITS på manuelt normaliserte bevaringsfiler", - "pt_BR": "Executar o FITS em arquivos de preservação normalizados manuaimente", - "sv": "Kör FITS på manuellt normaliserade bevarandefiler" + "en": "Characterize and extract metadata on manual normalized preservation files", + "no": "Karakteriser og hent ut metadata på manuelt normaliserte bevaringsfiler", + "pt_BR": "Caracterizar e extrair metadados em arquivos de preservação normalizados manuaimente", + "sv": "Karaktärisera och extrahera metadata på manuellt normaliserade bevarandefiler" }, "exit_codes": { "0": { @@ -1948,7 +1948,7 @@ "group": { "en": "Process manually normalized files", "es": "Procesar manualmente ficheros normalizados", - "no": "Prosesser normaliserte filer manuelt", + "no": "Prosesser manuelt normaliserte filer", "pt_BR": "Processar arquivos normalizados manualmente", "sv": "Bearbeta manuellt normaliserade filer" } @@ -2934,11 +2934,11 @@ "exit_codes": { "0": { "job_status": "Completed successfully", - "link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32" + "link_id": "1b1a4565-b501-407b-b40f-2f20889423f1" } }, "fallback_job_status": "Failed", - "fallback_link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32", + "fallback_link_id": "1b1a4565-b501-407b-b40f-2f20889423f1", "group": { "en": "Extract packages", "es": "Extraer paquetes", @@ -7667,7 +7667,7 @@ } }, "fallback_job_status": "Failed", - "fallback_link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32", + "fallback_link_id": "1b1a4565-b501-407b-b40f-2f20889423f1", "group": { "en": "Extract packages", "es": "Extraer paquetes", @@ -9044,41 +9044,6 @@ "sv": "Normalisera" } }, - "bd382151-afd0-41bf-bb7a-b39aef728a32": { - "config": { - "@manager": "linkTaskManagerFiles", - "@model": "StandardTaskConfig", - "arguments": "\"%relativeLocation%\" \"%SIPLogsDirectory%fileMeta/%fileUUID%.xml\" \"%date%\" \"%taskUUID%\" \"%fileUUID%\" \"%fileGrpUse%\"", - "execute": "FITS_v0.0", - "filter_subdir": "objects/attachments" - }, - "description": { - "en": "Characterize and extract metadata for attachments", - "es": "Caracterizar y extraer los metadatos de los adjuntos", - "fr": "Caractériser et extraire les métadonnées pour mettre en pièces jointes", - "ja": "添付ファイルのメタデータの特徴付けと抽出", - "no": "Karakteriser og hent ut metadata fra vedlegg", - "pt_BR": "Caracterizar e extrair metadados para anexos", - "sv": "Karaktärisera och extrahera metadata för bilagor" - }, - "exit_codes": { - "0": { - "job_status": "Completed successfully", - "link_id": "1b1a4565-b501-407b-b40f-2f20889423f1" - } - }, - "fallback_job_status": "Failed", - "fallback_link_id": "61c316a6-0a50-4f65-8767-1f44b1eeb6dd", - "group": { - "en": "Characterize and extract metadata", - "es": "Caracterizar y extraer metadatos", - "fr": "Caractériser et extraire les métadonnées", - "ja": "メタデータの特徴付けと抽出", - "no": "Karakteriser og hent ut metadata", - "pt_BR": "Caracterizar e extrair metadados", - "sv": "Karaktärisera och extrahera metadata" - } - }, "bd792750-a55b-42e9-903a-8c898bb77df1": { "config": { "@manager": "linkTaskManagerDirectories", @@ -9276,6 +9241,38 @@ "sv": "Byt namn på SIP-mappen med SIP UUID" } }, + "bf0ea0f6-211b-4b34-8f25-8a68145403c8": { + "config": { + "@manager": "linkTaskManagerFiles", + "@model": "StandardTaskConfig", + "arguments": "\"True\" \"%relativeLocation%\" \"%fileUUID%\" --disable-reidentify", + "execute": "identifyFileFormat_v0.0", + "filter_subdir": "objects/manualNormalization/preservation" + }, + "description": { + "en": "Identify file format", + "es": "Identificar formato de fichero", + "fr": "Identifier le format de fichier", + "no": "Identifiser filformat", + "pt_BR": "Identifique o formato do arquivo", + "sv": "Identifiera filformat" + }, + "exit_codes": { + "0": { + "job_status": "Completed successfully", + "link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8" + } + }, + "fallback_job_status": "Failed", + "fallback_link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8", + "group": { + "en": "Process manually normalized files", + "es": "Procesar manualmente ficheros normalizados", + "no": "Prosesser manuelt normaliserte filer", + "pt_BR": "Processar arquivos normalizados manualmente", + "sv": "Bearbeta manuellt normaliserade filer" + } + }, "c103b2fb-9a6b-4b68-8112-b70597a6cd14": { "config": { "@manager": "linkTaskManagerDirectories", @@ -10760,7 +10757,7 @@ "exit_codes": { "0": { "job_status": "Completed successfully", - "link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8" + "link_id": "bf0ea0f6-211b-4b34-8f25-8a68145403c8" } }, "fallback_job_status": "Failed", diff --git a/src/dashboard/src/fpr/migrations/0044_remove_fits.py b/src/dashboard/src/fpr/migrations/0044_remove_fits.py new file mode 100644 index 000000000..85f53d009 --- /dev/null +++ b/src/dashboard/src/fpr/migrations/0044_remove_fits.py @@ -0,0 +1,12 @@ +from django.db import migrations + + +def data_migration(apps, schema_editor): + FPTool = apps.get_model("fpr", "FPTool") + FPTool.objects.filter(description="FITS").delete() + + +class Migration(migrations.Migration): + dependencies = [("fpr", "0043_update_default_thumbnail_command")] + + operations = [migrations.RunPython(data_migration, migrations.RunPython.noop)] From a78868e0725c3d41704a24765a6aabeffd7b4c24 Mon Sep 17 00:00:00 2001 From: Douglas Cerna Date: Thu, 12 Sep 2024 11:38:08 -0600 Subject: [PATCH 5/8] Escape output and error in launchSubProcess --- pyproject.toml | 2 + .../lib/executeOrRunSubProcess.py | 74 +++++++++++++------ .../test_execute_functions.py | 30 +++++++- 3 files changed, 79 insertions(+), 27 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 86d11bc48..37e416103 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ warn_unused_configs = true [[tool.mypy.overrides]] module = [ + "src.archivematicaCommon.lib.executeOrRunSubProcess", "src.MCPClient.lib.client.*", "src.MCPClient.lib.clientScripts.characterize_file", "src.MCPClient.lib.clientScripts.has_packages", @@ -64,6 +65,7 @@ module = [ "src.MCPClient.lib.clientScripts.policy_check", "src.MCPClient.lib.clientScripts.transcribe_file", "src.MCPClient.lib.clientScripts.validate_file", + "tests.archivematicaCommon.test_execute_functions", "tests.dashboard.fpr.test_views", "tests.MCPClient.conftest", "tests.MCPClient.test_characterize_file", diff --git a/src/archivematicaCommon/lib/executeOrRunSubProcess.py b/src/archivematicaCommon/lib/executeOrRunSubProcess.py index 86a377890..0b8ada6cf 100644 --- a/src/archivematicaCommon/lib/executeOrRunSubProcess.py +++ b/src/archivematicaCommon/lib/executeOrRunSubProcess.py @@ -21,16 +21,29 @@ import subprocess import sys import tempfile +from typing import Dict +from typing import List +from typing import Optional +from typing import Tuple +from typing import Union + +from archivematicaFunctions import escape + +Arguments = List[str] +Input = Union[str, bytes, io.IOBase] +Environment = Dict[str, str] +Command = Union[str, List[str]] +Result = Tuple[int, str, str] def launchSubProcess( - command, - stdIn="", - printing=True, - arguments=None, - env_updates=None, - capture_output=False, -): + command: Command, + stdIn: Input = "", + printing: bool = True, + arguments: Optional[Arguments] = None, + env_updates: Optional[Environment] = None, + capture_output: bool = False, +) -> Result: """ Launches a subprocess using ``command``, where ``command`` is either: a) a single string containing a commandline statement, or @@ -89,7 +102,7 @@ def launchSubProcess( stdin_pipe = subprocess.PIPE communicate_input = stdIn elif isinstance(stdIn, io.IOBase): - stdin_pipe = stdIn + stdin_pipe = stdIn.fileno() communicate_input = None else: raise Exception("stdIn must be a string or a file object") @@ -103,8 +116,8 @@ def launchSubProcess( env=my_env, ) std_out, std_error = p.communicate(input=communicate_input) - stdOut = std_out.decode() - stdError = std_error.decode() + stdOut = escape(std_out) + stdError = escape(std_error) else: # Ignore the stdout of the subprocess, capturing only stderr with open(os.devnull, "w") as devnull: @@ -116,7 +129,7 @@ def launchSubProcess( stderr=subprocess.PIPE, ) __, std_error = p.communicate(input=communicate_input) - stdError = std_error.decode() + stdError = escape(std_error) retcode = p.returncode # If we are not capturing output and the subprocess has succeeded, set # its stderr to the empty string. @@ -139,8 +152,13 @@ def launchSubProcess( def createAndRunScript( - text, stdIn="", printing=True, arguments=None, env_updates=None, capture_output=True -): + text: Command, + stdIn: Input = "", + printing: bool = True, + arguments: Optional[Arguments] = None, + env_updates: Optional[Environment] = None, + capture_output: bool = True, +) -> Result: if arguments is None: arguments = [] if env_updates is None: @@ -150,7 +168,10 @@ def createAndRunScript( encoding="utf-8", mode="wt", delete=False ) as tmpfile: os.chmod(tmpfile.name, 0o770) - tmpfile.write(text) + if isinstance(text, str): + tmpfile.write(text) + else: + tmpfile.write(" ".join(text)) tmpfile.close() cmd = [tmpfile.name] cmd.extend(arguments) @@ -168,14 +189,14 @@ def createAndRunScript( def executeOrRun( - type, - text, - stdIn="", - printing=True, - arguments=None, - env_updates=None, - capture_output=True, -): + type: str, + text: Command, + stdIn: Input = "", + printing: bool = True, + arguments: Optional[Arguments] = None, + env_updates: Optional[Environment] = None, + capture_output: bool = True, +) -> Result: """ Attempts to run the provided command on the shell, with the text of "stdIn" passed as standard input if provided. The type parameter @@ -220,7 +241,9 @@ def executeOrRun( capture_output=capture_output, ) if type == "bashScript": - text = "#!/bin/bash\n" + text + if not isinstance(text, str): + raise ValueError("command must be a str") + text = f"#!/bin/bash\n{text}" return createAndRunScript( text, stdIn=stdIn, @@ -230,7 +253,9 @@ def executeOrRun( capture_output=capture_output, ) if type == "pythonScript": - text = "#!/usr/bin/env python\n" + text + if not isinstance(text, str): + raise ValueError("command must be a str") + text = f"#!/usr/bin/env python\n{text}" return createAndRunScript( text, stdIn=stdIn, @@ -248,3 +273,4 @@ def executeOrRun( env_updates=env_updates, capture_output=capture_output, ) + raise ValueError(f"unknown type {type}") diff --git a/tests/archivematicaCommon/test_execute_functions.py b/tests/archivematicaCommon/test_execute_functions.py index 0effb2719..0f1fa41b9 100644 --- a/tests/archivematicaCommon/test_execute_functions.py +++ b/tests/archivematicaCommon/test_execute_functions.py @@ -1,13 +1,16 @@ +import pathlib import shlex import tempfile +from typing import Generator from unittest.mock import ANY +from unittest.mock import Mock from unittest.mock import patch import executeOrRunSubProcess as execsub import pytest -def test_capture_output(): +def test_capture_output() -> None: """Tests behaviour of capture_output when executing sub processes.""" # Test that stdout and stderr are not captured by default @@ -61,7 +64,7 @@ def test_capture_output(): @pytest.fixture -def temp_path(tmp_path): +def temp_path(tmp_path: pathlib.Path) -> Generator[str, None, None]: """Creates custom temp path, yields the value, and resets to original value.""" original_tempdir = tempfile.tempdir @@ -73,7 +76,9 @@ def temp_path(tmp_path): @patch("executeOrRunSubProcess.launchSubProcess") -def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp_path): +def test_createAndRunScript_creates_tmpfile_in_custom_dir( + launchSubProcess: Mock, temp_path: str +) -> None: """Tests execution of launchSubProcess when executing createAndRunScript.""" script_content = "#!/bin/bash\necho 'Script output'\nexit 0" @@ -89,3 +94,22 @@ def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp ) args, _ = launchSubProcess.call_args assert args[0][0].startswith(temp_path) + + +@patch("subprocess.Popen") +def test_launchSubProcess_replaces_non_utf8_output_with_replacement_characters( + popen: Mock, +) -> None: + communicate_return_code = 0 + communicate_output = b"Output \xae" + communicate_error = b"Error \xae" + popen.return_value = Mock( + returncode=communicate_return_code, + **{"communicate.return_value": (communicate_output, communicate_error)}, + ) + + code, stdout, stderr = execsub.launchSubProcess("mycommand", capture_output=True) + + assert code == communicate_return_code + assert stdout == communicate_output.decode(errors="replace") + assert stderr == communicate_error.decode(errors="replace") From 36738c9a0703cb5bc71dff6311c6191efda4c20f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Garc=C3=ADa=20Crespo?= Date: Fri, 30 Aug 2024 05:57:26 +0000 Subject: [PATCH 6/8] Add missing settings fixture to normalize test This change ensures that the test does not attempt to write to `/var/archivematica` but to a temporary directory created by the ptyest fixture. --- tests/MCPClient/test_normalize.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/MCPClient/test_normalize.py b/tests/MCPClient/test_normalize.py index 6da999e71..025a34d5a 100644 --- a/tests/MCPClient/test_normalize.py +++ b/tests/MCPClient/test_normalize.py @@ -692,6 +692,7 @@ def test_normalization_fallbacks_to_default_thumbnail_rule_if_initial_command_fa fprule_thumbnail: fprmodels.FPRule, fpcommand_thumbnail: fprmodels.FPCommand, fprule_default_thumbnail: fprmodels.FPRule, + settings: pytest_django.fixtures.SettingsWrapper, ) -> None: expected_thumbnail_content = b"thumbnail image content" expected_thumbnail_path = ( From c353a38b6f5a5f47071d0eb6357e58be34bcc8c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Garc=C3=ADa=20Crespo?= Date: Fri, 30 Aug 2024 06:35:40 +0000 Subject: [PATCH 7/8] Fix integrity errors in reingest tests This commit fixes some integrity errors in test_reingest_mets when loading fixtures with invalid foreign keys. It is unclear why these errors are not reproducible in the Docker environment. FAILED tests/test_reingest_mets.py::TestAddingNewFiles::test_add_metadata_csv - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '1' has an invalid foreign key: Events_agents.agent_id contains a value '1' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestAddingNewFiles::test_no_new_files - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '1' has an invalid foreign key: Events_agents.agent_id contains a value '1' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestAddingNewFiles::test_new_metadata_file_in_subdir - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '1' has an invalid foreign key: Events_agents.agent_id contains a value '1' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestAddingNewFiles::test_new_preservation_file - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '1' has an invalid foreign key: Events_agents.agent_id contains a value '1' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_file_id - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_all - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_checksum_type - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_preservation_derivative - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestUpdateObject::test_object_not_updated - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_characterization - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestUpdateObject::test__update_premis_object - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk. FAILED tests/test_reingest_mets.py::TestDeleteFiles::test_delete_file - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk. --- tests/MCPClient/fixtures/reingest-checksum.json | 2 +- tests/MCPClient/fixtures/reingest-file-id.json | 2 +- .../MCPClient/fixtures/reingest-preservation.json | 8 ++++---- tests/MCPClient/test_reingest_mets.py | 14 ++++++++++++-- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/tests/MCPClient/fixtures/reingest-checksum.json b/tests/MCPClient/fixtures/reingest-checksum.json index 7ed18d3ea..b275e842c 100644 --- a/tests/MCPClient/fixtures/reingest-checksum.json +++ b/tests/MCPClient/fixtures/reingest-checksum.json @@ -12,7 +12,7 @@ "fields": { "event_type": "message digest calculation", "event_id": "5a561b24-cc7e-4032-b005-f75de2ec558a", - "agents": [1,2], + "agents": [2,3], "event_detail": "program=\"python\"; module=\"hashlib.md5()\"", "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f", "event_outcome_detail": "ac63a92ba5a94c337e740d6f189200d0", diff --git a/tests/MCPClient/fixtures/reingest-file-id.json b/tests/MCPClient/fixtures/reingest-file-id.json index 4acb49352..98181bf19 100644 --- a/tests/MCPClient/fixtures/reingest-file-id.json +++ b/tests/MCPClient/fixtures/reingest-file-id.json @@ -5,7 +5,7 @@ "fields": { "event_type": "format identification", "event_id": "44455720-d9a7-43a4-90b6-a9cdf6e5c8cc", - "agents": [1,2,3], + "agents": [2,3], "event_detail": "program=\"Fido\"; version=\"1.2\"", "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f", "event_outcome_detail": "fmt/9000", diff --git a/tests/MCPClient/fixtures/reingest-preservation.json b/tests/MCPClient/fixtures/reingest-preservation.json index a9073ee37..aec5c244c 100644 --- a/tests/MCPClient/fixtures/reingest-preservation.json +++ b/tests/MCPClient/fixtures/reingest-preservation.json @@ -26,7 +26,7 @@ "fields": { "event_type": "normalization", "event_id": "291f9be4-d19a-4bcc-8e1c-d3f01e4a48b1", - "agents": [1,2,3], + "agents": [2,3], "event_detail": "ArchivematicaFPRCommandID=\"a34ddc9b-c922-4bb6-8037-bbe713332175\"; program=\"convert\"; version=\"Version: ImageMagick 6.7.7-10 2014-03-06 Q16 http://www.imagemagick.org\"\n", "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f", "event_outcome_detail": "%SIPDirectory%objects/evelyn_s_photo-d8cc7af7-284a-42f5-b7f4-e181a0efc35f.tif", @@ -59,7 +59,7 @@ "fields": { "event_type": "creation", "event_id": "a89e6b45-1ac0-49cc-9dda-a4d11ed63f2f", - "agents": [1,2,3], + "agents": [2,3], "event_detail": "", "file_uuid": "d8cc7af7-284a-42f5-b7f4-e181a0efc35f", "event_outcome_detail": "", @@ -73,7 +73,7 @@ "fields": { "event_type": "message digest calculation", "event_id": "5c505f21-4e9a-49aa-b7dd-ed699fd4f8ef", - "agents": [1,2,3], + "agents": [2,3], "event_detail": "program=\"python\"; module=\"hashlib.sha256()\"", "file_uuid": "d8cc7af7-284a-42f5-b7f4-e181a0efc35f", "event_outcome_detail": "d82448f154b9185bc777ecb0a3602760eb76ba85dd3098f073b2c91a03f571e9", @@ -87,7 +87,7 @@ "fields": { "event_type": "fixity check", "event_id": "94ca2dee-b136-4a74-b477-a0b938bb49e9", - "agents": [1,2,3], + "agents": [2,3], "event_detail": "program=\"python\"; module=\"hashlib.sha256()\"", "file_uuid": "d8cc7af7-284a-42f5-b7f4-e181a0efc35f", "event_outcome_detail": "d82448f154b9185bc777ecb0a3602760eb76ba85dd3098f073b2c91a03f571e9 verified", diff --git a/tests/MCPClient/test_reingest_mets.py b/tests/MCPClient/test_reingest_mets.py index 31280eb8f..b3a4d99fa 100644 --- a/tests/MCPClient/test_reingest_mets.py +++ b/tests/MCPClient/test_reingest_mets.py @@ -28,7 +28,12 @@ class TestUpdateObject(TestCase): """Test updating the PREMIS:OBJECT in the techMD. (update_object).""" - fixture_files = ["sip-reingest.json", "files.json", "events-reingest.json"] + fixture_files = [ + "agents.json", + "sip-reingest.json", + "files.json", + "events-reingest.json", + ] fixtures = [os.path.join(FIXTURES_DIR, p) for p in fixture_files] def setUp(self): @@ -1839,7 +1844,12 @@ def test_new_preservation_file(self): class TestDeleteFiles(TestCase): """Test marking files in the METS as deleted. (delete_files)""" - fixture_files = ["sip-reingest.json", "files.json", "events-reingest.json"] + fixture_files = [ + "agents.json", + "sip-reingest.json", + "files.json", + "events-reingest.json", + ] fixtures = [os.path.join(FIXTURES_DIR, p) for p in fixture_files] sip_uuid = "4060ee97-9c3f-4822-afaf-ebdf838284c3" From d6b3869ca742c6be955e4bd34f640d59949941b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Garc=C3=ADa=20Crespo?= Date: Fri, 30 Aug 2024 07:04:29 +0000 Subject: [PATCH 8/8] Update bagit-python Use the dev/am-117 branch of the bagit-python fork which includes a fix for environments where pkg_resources is not available, i.e. Python 3.12 where setuptools is not bundled, even though we're currently listing setuptools as a dependency of zope (used by gevent). --- requirements-dev.txt | 2 +- requirements.in | 2 +- requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 3e3453b17..ed2d77877 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -19,7 +19,7 @@ attrs==24.2.0 # -r requirements.txt # jsonschema # referencing -bagit @ git+https://github.com/artefactual-labs/bagit-python.git@4b8fde73b4e631461bfd7add87e200500d40ca21 +bagit @ git+https://github.com/artefactual-labs/bagit-python.git@902051d8410219f6c5f4ce6d43e5b272cf29e89b # via -r requirements.txt brotli==1.1.0 # via -r requirements.txt diff --git a/requirements.in b/requirements.in index 7b5744aaf..4393cd65f 100644 --- a/requirements.in +++ b/requirements.in @@ -3,7 +3,7 @@ Django>=4.2,<5 agentarchives amclient ammcpc -git+https://github.com/artefactual-labs/bagit-python.git@4b8fde73b4e631461bfd7add87e200500d40ca21#egg=bagit +git+https://github.com/artefactual-labs/bagit-python.git@902051d8410219f6c5f4ce6d43e5b272cf29e89b#egg=bagit brotli clamd django-autoslug diff --git a/requirements.txt b/requirements.txt index dbf479dc6..28c3e1fcc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ attrs==24.2.0 # via # jsonschema # referencing -bagit @ git+https://github.com/artefactual-labs/bagit-python.git@4b8fde73b4e631461bfd7add87e200500d40ca21 +bagit @ git+https://github.com/artefactual-labs/bagit-python.git@902051d8410219f6c5f4ce6d43e5b272cf29e89b # via -r requirements.in brotli==1.1.0 # via -r requirements.in