From 48d3c1140b6b2dba9dae4f99c1edae8a7bbb7ebc Mon Sep 17 00:00:00 2001
From: Douglas Cerna <douglascerna@yahoo.com>
Date: Thu, 29 Aug 2024 14:16:26 -0600
Subject: [PATCH 1/8] Prefetch related models in FPR views

---
 pyproject.toml                    |   1 +
 src/dashboard/src/fpr/forms.py    |  22 +-
 src/dashboard/src/fpr/views.py    |  22 +-
 tests/dashboard/fpr/test_views.py | 326 +++++++++++++++++++-----------
 4 files changed, 244 insertions(+), 127 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 34d52fe0e..86d11bc48 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,6 +64,7 @@ module = [
     "src.MCPClient.lib.clientScripts.policy_check",
     "src.MCPClient.lib.clientScripts.transcribe_file",
     "src.MCPClient.lib.clientScripts.validate_file",
+    "tests.dashboard.fpr.test_views",
     "tests.MCPClient.conftest",
     "tests.MCPClient.test_characterize_file",
     "tests.MCPClient.test_has_packages",
diff --git a/src/dashboard/src/fpr/forms.py b/src/dashboard/src/fpr/forms.py
index 39ee6a0f1..e2e23a2ba 100644
--- a/src/dashboard/src/fpr/forms.py
+++ b/src/dashboard/src/fpr/forms.py
@@ -79,8 +79,14 @@ class IDRuleForm(forms.ModelForm):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         # Limit to only enabled formats/commands
-        self.fields["format"].queryset = fprmodels.FormatVersion.active.all()
-        self.fields["command"].queryset = fprmodels.IDCommand.active.all()
+        self.fields[
+            "format"
+        ].queryset = fprmodels.FormatVersion.active.all().prefetch_related(
+            "format__group"
+        )
+        self.fields[
+            "command"
+        ].queryset = fprmodels.IDCommand.active.all().prefetch_related("tool")
 
     class Meta:
         model = fprmodels.IDRule
@@ -110,7 +116,11 @@ def __init__(self, *args, **kwargs):
             self.fields["command"].initial = self.instance.command.uuid
 
         # Show only active format versions in the format dropdown
-        self.fields["format"].queryset = fprmodels.FormatVersion.active.all()
+        self.fields[
+            "format"
+        ].queryset = fprmodels.FormatVersion.active.all().prefetch_related(
+            "format__group"
+        )
 
     def clean(self):
         cleaned_data = super().clean()
@@ -168,6 +178,12 @@ class FPCommandForm(forms.ModelForm):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
+        self.fields[
+            "output_format"
+        ].queryset = fprmodels.FormatVersion.active.all().prefetch_related(
+            "format__group"
+        )
+
         verification_commands = fprmodels.FPCommand.active.filter(
             command_usage="verification"
         )
diff --git a/src/dashboard/src/fpr/views.py b/src/dashboard/src/fpr/views.py
index 378b9dd58..7b15734ba 100644
--- a/src/dashboard/src/fpr/views.py
+++ b/src/dashboard/src/fpr/views.py
@@ -317,7 +317,9 @@ def idrule_list(request):
             "replaces_id"
         )
     ]
-    idrules = fprmodels.IDRule.objects.exclude(uuid__in=replacing_rules)
+    idrules = fprmodels.IDRule.objects.exclude(
+        uuid__in=replacing_rules
+    ).prefetch_related("format__format__group", "command")
     return render(request, "fpr/idrule/list.html", context(locals()))
 
 
@@ -391,7 +393,9 @@ def idcommand_list(request):
             "replaces_id"
         )
     ]
-    idcommands = fprmodels.IDCommand.objects.exclude(uuid__in=replacing_commands)
+    idcommands = fprmodels.IDCommand.objects.exclude(
+        uuid__in=replacing_commands
+    ).prefetch_related("tool")
     return render(request, "fpr/idcommand/list.html", context(locals()))
 
 
@@ -493,7 +497,11 @@ def fprule_list(request, usage=None):
     else:
         opts = {}
     # Display disabled rules as long as they aren't replaced by another rule
-    fprules = fprmodels.FPRule.objects.filter(**opts).exclude(uuid__in=replacing_rules)
+    fprules = (
+        fprmodels.FPRule.objects.filter(**opts)
+        .exclude(uuid__in=replacing_rules)
+        .prefetch_related("format__format__group", "command")
+    )
     return render(request, "fpr/fprule/list.html", context(locals()))
 
 
@@ -621,8 +629,10 @@ def fpcommand_list(request, usage=None):
             "replaces_id"
         )
     ]
-    fpcommands = fprmodels.FPCommand.objects.filter(**opts).exclude(
-        uuid__in=replacing_commands
+    fpcommands = (
+        fprmodels.FPCommand.objects.filter(**opts)
+        .exclude(uuid__in=replacing_commands)
+        .prefetch_related("tool")
     )
     return render(request, "fpr/fpcommand/list.html", context(locals()))
 
@@ -641,7 +651,7 @@ def fpcommand_edit(request, uuid=None):
         title = _("Replace command %(name)s") % {"name": fpcommand.description}
     else:
         fpcommand = None
-        title = _("Create format version")
+        title = _("Create format policy command")
     if request.method == "POST":
         form = fprforms.FPCommandForm(request.POST, instance=fpcommand)
         if form.is_valid():
diff --git a/tests/dashboard/fpr/test_views.py b/tests/dashboard/fpr/test_views.py
index 3f1882207..6b246d21f 100644
--- a/tests/dashboard/fpr/test_views.py
+++ b/tests/dashboard/fpr/test_views.py
@@ -1,122 +1,129 @@
+import uuid
+
 import pytest
 from components import helpers
-from django.contrib.auth.models import User
-from django.test import TestCase
+from django.test import Client
 from django.urls import reverse
-from fpr.models import Format
-from fpr.models import FormatGroup
-from fpr.models import FPCommand
-from fpr.models import FPTool
-from fpr.models import IDTool
-
-
-class TestViews(TestCase):
-    def setUp(self):
-        user = User.objects.create_superuser("demo", "demo@example.com", "demo")
-        self.client.login(username=user.username, password="demo")
-        helpers.set_setting("dashboard_uuid", "test-uuid")
-
-    def test_idcommand_create(self):
-        url = reverse("fpr:idcommand_create")
-        tool = IDTool.objects.create(
-            uuid="37f3bd7c-bb24-4899-b7c4-785ff1c764ac",
-            description="Foobar",
-            version="v1.2.3",
-        )
-
-        resp = self.client.get(url)
-        self.assertEqual(resp.context["form"].initial["tool"], None)
-
-        resp = self.client.get(url, {"parent": "c80458d9-2b62-40f4-b61c-936bfb72901d"})
-        self.assertEqual(resp.context["form"].initial["tool"], None)
-
-        resp = self.client.get(url, {"parent": tool.uuid})
-        self.assertEqual(resp.context["form"].initial["tool"], tool)
-
-    def test_fpcommand_create(self):
-        url = reverse("fpr:fpcommand_create")
-        tool = FPTool.objects.create(
-            uuid="37f3bd7c-bb24-4899-b7c4-785ff1c764ac",
-            description="Foobar",
-            version="v1.2.3",
-        )
-
-        resp = self.client.get(url)
-        self.assertEqual(resp.context["form"].initial["tool"], None)
-
-        resp = self.client.get(url, {"parent": "d993bdcf-a944-4df8-b960-1b20c14ffe68"})
-        self.assertEqual(resp.context["form"].initial["tool"], None)
-
-        resp = self.client.get(url, {"parent": tool.uuid})
-        self.assertEqual(resp.context["form"].initial["tool"], tool)
-
-    def test_fpcommand_edit(self):
-        fpcommand_id = "41112047-7ddf-4bf0-9156-39fe96b32d53"
-        url = reverse("fpr:fpcommand_edit", args=[fpcommand_id])
-
-        fpcommand = FPCommand.active.get(uuid=fpcommand_id)
-        self.assertEqual(fpcommand.description, "Copying file to access directory")
-
-        form_data = {
-            "verification_command": ["ef3ea000-0c3c-4cae-adc2-aa2a6ccbffce"],
-            "description": ["new description"],
-            "tool": ["0efc346e-6373-4799-819d-17cc0f21f827"],
-            "event_detail_command": [""],
-            "output_location": [
-                "%outputDirectory%%prefix%%fileName%%postfix%%fileExtensionWithDot%"
-            ],
-            "command_usage": ["normalization"],
-            "command": [
-                'cp -R "%inputFile%" "%outputDirectory%%prefix%%fileName%%postfix%%fileExtensionWithDot%"'
-            ],
-            "csrfmiddlewaretoken": [
-                "k5UUufiJuSOLNOGJYlU2ODow5iKPhOuLc9Q0EmUoIXsQLZ7r5Ede7Pf0pSQEm0lP"
-            ],
-            "output_format": ["0ab4cd40-90e7-4d75-b294-498177b3897d"],
-            "script_type": ["command"],
-        }
-        resp = self.client.post(url, follow=True, data=form_data)
-        self.assertEqual(resp.status_code, 200)
-
-        # Our fpcommand is now expected to be disabled.
-        fpcommand = FPCommand.objects.get(uuid=fpcommand_id)
-        self.assertEqual(fpcommand.enabled, False)
-
-        # And replaced by a new fpcommand.
-        fpcommand = FPCommand.active.get(replaces_id=fpcommand_id)
-        self.assertEqual(fpcommand.description, "new description")
-
-    def test_fpcommand_delete(self):
-        fpcommand_id = "0fd7935a-ed0d-4f67-aa25-1b44684f6aca"
-        url = reverse("fpr:fpcommand_delete", args=[fpcommand_id])
-
-        self.assertEqual(FPCommand.active.filter(uuid=fpcommand_id).exists(), True)
-
-        resp = self.client.post(url, follow=True, data={"disable": True})
-
-        self.assertEqual(resp.status_code, 200)
-        self.assertEqual(FPCommand.active.filter(uuid=fpcommand_id).exists(), False)
-
-    def test_fpcommand_revisions(self):
-        fpcommand_id = "cb335c49-e6ce-445f-a774-494a6f2300c6"
-        url = reverse("fpr:revision_list", args=["fpcommand", fpcommand_id])
-        fpcommand = FPCommand.active.get(uuid=fpcommand_id)
-
-        resp = self.client.get(url, follow=True)
-
-        # Assert that the revision list shows multiple instances.
-        self.assertContains(resp, fpcommand.uuid)
-        self.assertContains(resp, fpcommand.replaces_id)
+from fpr import models
+
+
+@pytest.fixture
+def dashboard_uuid() -> None:
+    helpers.set_setting("dashboard_uuid", str(uuid.uuid4()))
+
+
+@pytest.mark.django_db
+def test_idcommand_create(dashboard_uuid: None, admin_client: Client) -> None:
+    url = reverse("fpr:idcommand_create")
+    tool = models.IDTool.objects.create(
+        uuid="37f3bd7c-bb24-4899-b7c4-785ff1c764ac",
+        description="Foobar",
+        version="v1.2.3",
+    )
+
+    resp = admin_client.get(url)
+    assert resp.context["form"].initial["tool"] is None
+
+    resp = admin_client.get(url, {"parent": str(uuid.uuid4())})
+    assert resp.context["form"].initial["tool"] is None
+
+    resp = admin_client.get(url, {"parent": tool.uuid})
+    assert resp.context["form"].initial["tool"] == tool
+
+
+@pytest.mark.django_db
+def test_fpcommand_create(dashboard_uuid: None, admin_client: Client) -> None:
+    url = reverse("fpr:fpcommand_create")
+    tool = models.FPTool.objects.create(
+        uuid="37f3bd7c-bb24-4899-b7c4-785ff1c764ac",
+        description="Foobar",
+        version="v1.2.3",
+    )
+
+    resp = admin_client.get(url)
+    assert resp.context["form"].initial["tool"] is None
+
+    resp = admin_client.get(url, {"parent": str(uuid.uuid4())})
+    assert resp.context["form"].initial["tool"] is None
+
+    resp = admin_client.get(url, {"parent": tool.uuid})
+    assert resp.context["form"].initial["tool"] == tool
+
+
+@pytest.mark.django_db
+def test_fpcommand_edit(dashboard_uuid: None, admin_client: Client) -> None:
+    fpcommand_id = "41112047-7ddf-4bf0-9156-39fe96b32d53"
+    url = reverse("fpr:fpcommand_edit", args=[fpcommand_id])
+
+    fpcommand = models.FPCommand.active.get(uuid=fpcommand_id)
+    assert fpcommand.description == "Copying file to access directory"
+
+    form_data = {
+        "verification_command": ["ef3ea000-0c3c-4cae-adc2-aa2a6ccbffce"],
+        "description": ["new description"],
+        "tool": ["0efc346e-6373-4799-819d-17cc0f21f827"],
+        "event_detail_command": [""],
+        "output_location": [
+            "%outputDirectory%%prefix%%fileName%%postfix%%fileExtensionWithDot%"
+        ],
+        "command_usage": ["normalization"],
+        "command": [
+            'cp -R "%inputFile%" "%outputDirectory%%prefix%%fileName%%postfix%%fileExtensionWithDot%"'
+        ],
+        "csrfmiddlewaretoken": [
+            "k5UUufiJuSOLNOGJYlU2ODow5iKPhOuLc9Q0EmUoIXsQLZ7r5Ede7Pf0pSQEm0lP"
+        ],
+        "output_format": ["0ab4cd40-90e7-4d75-b294-498177b3897d"],
+        "script_type": ["command"],
+    }
+    resp = admin_client.post(url, follow=True, data=form_data)
+    assert resp.status_code == 200
+
+    # Our fpcommand is now expected to be disabled.
+    fpcommand = models.FPCommand.objects.get(uuid=fpcommand_id)
+    assert not fpcommand.enabled
+
+    # And replaced by a new fpcommand.
+    fpcommand = models.FPCommand.active.get(replaces_id=fpcommand_id)
+    assert fpcommand.description == "new description"
+
+
+@pytest.mark.django_db
+def test_fpcommand_delete(dashboard_uuid: None, admin_client: Client) -> None:
+    fpcommand_id = "0fd7935a-ed0d-4f67-aa25-1b44684f6aca"
+    url = reverse("fpr:fpcommand_delete", args=[fpcommand_id])
+
+    assert models.FPCommand.active.filter(uuid=fpcommand_id).exists()
+
+    resp = admin_client.post(url, follow=True, data={"disable": True})
+
+    assert resp.status_code == 200
+    assert not models.FPCommand.active.filter(uuid=fpcommand_id).exists()
+
+
+@pytest.mark.django_db
+def test_fpcommand_revisions(dashboard_uuid: None, admin_client: Client) -> None:
+    fpcommand_id = "cb335c49-e6ce-445f-a774-494a6f2300c6"
+    url = reverse("fpr:revision_list", args=["fpcommand", fpcommand_id])
+    fpcommand = models.FPCommand.active.get(uuid=fpcommand_id)
+
+    resp = admin_client.get(url, follow=True)
+
+    # Assert that the revision list shows multiple instances.
+    content = resp.content.decode()
+    assert str(fpcommand.uuid) in content
+    assert str(fpcommand.replaces_id) in content
 
 
 @pytest.mark.django_db
-def test_format_create_creates_format(admin_client):
-    helpers.set_setting("dashboard_uuid", "test-uuid")
+def test_format_create_creates_format(
+    dashboard_uuid: None, admin_client: Client
+) -> None:
     # Add a new format to the Unknown group.
-    unknown_group = FormatGroup.objects.get(description="Unknown")
+    unknown_group = models.FormatGroup.objects.get(description="Unknown")
     format_description = "My test format"
 
-    assert Format.objects.filter(description=format_description).count() == 0
+    assert models.Format.objects.filter(description=format_description).count() == 0
 
     response = admin_client.post(
         reverse("fpr:format_create"),
@@ -129,7 +136,7 @@ def test_format_create_creates_format(admin_client):
     assert "Saved" in content
     assert "Format My test format" in content
     assert (
-        Format.objects.filter(
+        models.Format.objects.filter(
             description=format_description, group=unknown_group
         ).count()
         == 1
@@ -137,20 +144,19 @@ def test_format_create_creates_format(admin_client):
 
 
 @pytest.mark.django_db
-def test_format_edit_updates_format(admin_client):
-    helpers.set_setting("dashboard_uuid", "test-uuid")
+def test_format_edit_updates_format(dashboard_uuid: None, admin_client: Client) -> None:
     # Get details of the Matroska format from the Video group.
-    video_group = FormatGroup.objects.get(description="Video")
-    format = Format.objects.get(description="Matroska", group=video_group)
+    video_group = models.FormatGroup.objects.get(description="Video")
+    format = models.Format.objects.get(description="Matroska", group=video_group)
     format_uuid = format.uuid
     format_slug = format.slug
 
     # Update the group and description of the Matroska format.
-    unknown_group = FormatGroup.objects.get(description="Unknown")
+    unknown_group = models.FormatGroup.objects.get(description="Unknown")
     new_format_description = "My matroska format"
 
     assert (
-        Format.objects.filter(
+        models.Format.objects.filter(
             description=new_format_description, group=unknown_group
         ).count()
         == 0
@@ -167,7 +173,7 @@ def test_format_edit_updates_format(admin_client):
     assert "Saved" in content
     assert "Format My matroska format" in content
     assert (
-        Format.objects.filter(
+        models.Format.objects.filter(
             uuid=format_uuid,
             slug=format_slug,
             description=new_format_description,
@@ -175,3 +181,87 @@ def test_format_edit_updates_format(admin_client):
         ).count()
         == 1
     )
+
+
+@pytest.mark.django_db
+def test_idrule_create(dashboard_uuid: None, admin_client: Client) -> None:
+    url = reverse("fpr:idrule_create")
+
+    resp = admin_client.get(url)
+
+    assert resp.context["form"].initial == {}
+    assert "Create identification rule" in resp.content.decode()
+
+    format_version = models.FormatVersion.objects.create(
+        format=models.Format.objects.create(
+            group=models.FormatGroup.objects.create(description="Group"),
+            description="Format",
+        ),
+        description="Format version",
+    )
+    command = models.IDCommand.objects.create(
+        tool=models.IDTool.objects.create(description="Tool")
+    )
+    command_output = ".ppt"
+
+    resp = admin_client.post(
+        url,
+        {
+            "format": format_version.uuid,
+            "command": command.uuid,
+            "command_output": command_output,
+        },
+        follow=True,
+    )
+
+    assert "Saved." in resp.content.decode()
+    assert (
+        models.IDRule.objects.filter(
+            format=format_version.uuid,
+            command=command.uuid,
+            command_output=command_output,
+        ).count()
+        == 1
+    )
+
+
+@pytest.mark.django_db
+def test_fprule_create(dashboard_uuid: None, admin_client: Client) -> None:
+    url = reverse("fpr:fprule_create")
+
+    resp = admin_client.get(url)
+
+    assert resp.context["form"].initial == {}
+    assert "Create format policy rule" in resp.content.decode()
+
+    purpose = models.FPRule.CHARACTERIZATION
+    format_version = models.FormatVersion.objects.create(
+        format=models.Format.objects.create(
+            group=models.FormatGroup.objects.create(description="Group"),
+            description="Format",
+        ),
+        description="Format version",
+    )
+    command = models.FPCommand.objects.create(
+        tool=models.FPTool.objects.create(description="Tool")
+    )
+
+    resp = admin_client.post(
+        url,
+        {
+            "f-purpose": purpose,
+            "f-format": format_version.uuid,
+            "f-command": command.uuid,
+        },
+        follow=True,
+    )
+
+    assert "Saved." in resp.content.decode()
+    assert (
+        models.FPRule.objects.filter(
+            purpose=purpose,
+            format=format_version.uuid,
+            command=command.uuid,
+        ).count()
+        == 1
+    )

From 09418abbcb3c4144eb8abbea914c31370ad33df6 Mon Sep 17 00:00:00 2001
From: "Douglas Cerna (Soy Douglas)" <douglascerna@yahoo.com>
Date: Thu, 5 Sep 2024 15:34:56 +0000
Subject: [PATCH 2/8] Upgrade Python requirements

---
 requirements-dev.txt | 12 ++++++------
 requirements.txt     | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 2e008dd19..3e3453b17 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -27,11 +27,11 @@ build==1.2.1
     # via pip-tools
 cachetools==5.5.0
     # via tox
-certifi==2024.7.4
+certifi==2024.8.30
     # via
     #   -r requirements.txt
     #   requests
-cffi==1.17.0
+cffi==1.17.1
     # via
     #   -r requirements.txt
     #   cryptography
@@ -51,7 +51,7 @@ coverage[toml]==7.6.1
     # via
     #   -r requirements-dev.in
     #   pytest-cov
-cryptography==43.0.0
+cryptography==43.0.1
     # via
     #   -r requirements.txt
     #   josepy
@@ -59,7 +59,7 @@ cryptography==43.0.0
     #   pyopenssl
 distlib==0.3.8
     # via virtualenv
-django==4.2.15
+django==4.2.16
     # via
     #   -r requirements.txt
     #   django-auth-ldap
@@ -209,7 +209,7 @@ pytest==8.3.2
     #   pytest-randomly
 pytest-cov==5.0.0
     # via -r requirements-dev.in
-pytest-django==4.8.0
+pytest-django==4.9.0
     # via -r requirements-dev.in
 pytest-mock==3.14.0
     # via -r requirements-dev.in
@@ -305,7 +305,7 @@ zope-interface==7.0.3
 # The following packages are considered to be unsafe in a requirements file:
 pip==24.2
     # via pip-tools
-setuptools==74.0.0
+setuptools==74.1.2
     # via
     #   -r requirements.txt
     #   pip-tools
diff --git a/requirements.txt b/requirements.txt
index d7b5c8437..dbf479dc6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,20 +20,20 @@ bagit @ git+https://github.com/artefactual-labs/bagit-python.git@4b8fde73b4e6314
     # via -r requirements.in
 brotli==1.1.0
     # via -r requirements.in
-certifi==2024.7.4
+certifi==2024.8.30
     # via requests
-cffi==1.17.0
+cffi==1.17.1
     # via cryptography
 charset-normalizer==3.3.2
     # via requests
 clamd==1.0.2
     # via -r requirements.in
-cryptography==43.0.0
+cryptography==43.0.1
     # via
     #   josepy
     #   mozilla-django-oidc
     #   pyopenssl
-django==4.2.15
+django==4.2.16
     # via
     #   -r requirements.in
     #   django-auth-ldap
@@ -172,7 +172,7 @@ zope-interface==7.0.3
     # via gevent
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==74.0.0
+setuptools==74.1.2
     # via
     #   zope-event
     #   zope-interface

From f99179666441c0e452fccf34e10275278d9c1b63 Mon Sep 17 00:00:00 2001
From: "Douglas Cerna (Soy Douglas)" <douglascerna@yahoo.com>
Date: Tue, 10 Sep 2024 20:53:18 +0000
Subject: [PATCH 3/8] Add manual normalization AMAUAT to test matrix

---
 .github/workflows/acceptance-test.yml          | 1 +
 hack/submodules/archivematica-acceptance-tests | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/acceptance-test.yml b/.github/workflows/acceptance-test.yml
index cf6e886b9..255217160 100644
--- a/.github/workflows/acceptance-test.yml
+++ b/.github/workflows/acceptance-test.yml
@@ -25,6 +25,7 @@ jobs:
           - "icc"
           - "ipc preservation"
           - "ipc access"
+          - "man-norm"
           - "metadata-xml"
           - "tcc"
           - "tpc"
diff --git a/hack/submodules/archivematica-acceptance-tests b/hack/submodules/archivematica-acceptance-tests
index d44098ee2..e5887b48d 160000
--- a/hack/submodules/archivematica-acceptance-tests
+++ b/hack/submodules/archivematica-acceptance-tests
@@ -1 +1 @@
-Subproject commit d44098ee288fff0a4adb5c35b9f094ba04fa9cda
+Subproject commit e5887b48d695b2a8050c828d167807699ad8f457

From 0212db1978af3eb390ebdec205598ce4b2cc4fa9 Mon Sep 17 00:00:00 2001
From: Douglas Cerna <douglascerna@yahoo.com>
Date: Wed, 11 Sep 2024 07:41:22 -0600
Subject: [PATCH 4/8] Remove FITS

This removes:

* the fits service and its package dependencies from the Compose
  environment
* the FITS_v0.0 client script from the MCPClient and updates the
  MCPServer workflow to use the identifyFileFormat_v0.0 and
  characterizeFile_v0.0 scripts for manually normalized preservation
  files
* the FPR model instances (FPTool, FPCommand and FPRule) related to
  FITS with a database data migration in the fpr application of the
  Dashboard
---
 hack/Dockerfile                               |   2 -
 hack/README.md                                |   2 -
 hack/docker-compose.yml                       |  11 --
 src/MCPClient/lib/archivematicaClientModules  |   1 -
 .../lib/clientScripts/characterize_file.py    |   2 +-
 src/MCPClient/lib/clientScripts/fits.py       | 124 ------------------
 ...ization_create_metadata_and_restructure.py |   7 +-
 ..._normalization_move_access_files_to_dip.py |   2 +-
 src/MCPServer/lib/assets/workflow.json        |  89 ++++++-------
 .../src/fpr/migrations/0044_remove_fits.py    |  12 ++
 10 files changed, 63 insertions(+), 189 deletions(-)
 delete mode 100755 src/MCPClient/lib/clientScripts/fits.py
 create mode 100644 src/dashboard/src/fpr/migrations/0044_remove_fits.py

diff --git a/hack/Dockerfile b/hack/Dockerfile
index 3625f501e..e02c1cef9 100644
--- a/hack/Dockerfile
+++ b/hack/Dockerfile
@@ -168,7 +168,6 @@ RUN set -ex \
 		clamav \
 		coreutils \
 		ffmpeg \
-		fits \
 		g++ \
 		gcc \
 		gearman \
@@ -190,7 +189,6 @@ RUN set -ex \
 		md5deep \
 		mediaconch \
 		mediainfo \
-		nailgun \
 		nfs-common \
 		openjdk-8-jre-headless \
 		p7zip-full \
diff --git a/hack/README.md b/hack/README.md
index 72d2d3b1e..1c0063b10 100644
--- a/hack/README.md
+++ b/hack/README.md
@@ -64,7 +64,6 @@ am-archivematica-mcp-server-1        39.43MiB / 7.763GiB
 am-archivematica-storage-service-1   83.96MiB / 7.763GiB
 am-nginx-1                           2.715MiB / 7.763GiB
 am-elasticsearch-1                   900.2MiB / 7.763GiB
-am-fits-1                            71.09MiB / 7.763GiB
 am-gearmand-1                        3.395MiB / 7.763GiB
 am-mysql-1                           551.9MiB / 7.763GiB
 am-clamavd-1                         570MiB / 7.763GiB
@@ -312,7 +311,6 @@ echo workers | socat - tcp:127.0.0.1:62004,shut-none | grep "_v0.0" | awk '{prin
 | mysql                                   | `tcp/3306`     | `tcp/62001` |
 | elasticsearch                           | `tcp/9200`     | `tcp/62002` |
 | gearman                                 | `tcp/4730`     | `tcp/62004` |
-| fits                                    | `tcp/2113`     | `tcp/62005` |
 | clamavd                                 | `tcp/3310`     | `tcp/62006` |
 | nginx » archivematica-dashboard         | `tcp/80`       | `tcp/62080` |
 | nginx » archivematica-storage-service   | `tcp/8000`     | `tcp/62081` |
diff --git a/hack/docker-compose.yml b/hack/docker-compose.yml
index 9e4be90de..b123bb4d3 100644
--- a/hack/docker-compose.yml
+++ b/hack/docker-compose.yml
@@ -76,14 +76,6 @@ services:
     ports:
       - "127.0.0.1:62004:4730"
 
-  fits:
-    image: "artefactual/fits-ngserver:0.8.4"
-    user: ${USER_ID:-1000}
-    ports:
-      - "127.0.0.1:62005:2113"
-    volumes:
-      - "archivematica_pipeline_data:/var/archivematica/sharedDirectory:rw"  # Read and write needed!
-
   clamavd:
     image: "artefactual/clamav:latest"
     environment:
@@ -146,8 +138,6 @@ services:
     environment:
       DJANGO_SECRET_KEY: "12345"
       DJANGO_SETTINGS_MODULE: "settings.common"
-      NAILGUN_SERVER: "fits"
-      NAILGUN_PORT: "2113"
       ARCHIVEMATICA_MCPCLIENT_CLIENT_USER: "archivematica"
       ARCHIVEMATICA_MCPCLIENT_CLIENT_PASSWORD: "demo"
       ARCHIVEMATICA_MCPCLIENT_CLIENT_HOST: "mysql"
@@ -169,7 +159,6 @@ services:
       - "../:/src"
       - "archivematica_pipeline_data:/var/archivematica/sharedDirectory:rw"
     links:
-      - "fits"
       - "clamavd"
       - "mysql"
       - "gearmand"
diff --git a/src/MCPClient/lib/archivematicaClientModules b/src/MCPClient/lib/archivematicaClientModules
index 28f259eab..4dff8ceab 100644
--- a/src/MCPClient/lib/archivematicaClientModules
+++ b/src/MCPClient/lib/archivematicaClientModules
@@ -26,7 +26,6 @@ removeunneededfiles_v0.0 = remove_unneeded_files
 archivematicaclamscan_v0.0 = archivematica_clamscan
 createevent_v0.0 = create_event
 examinecontents_v0.0 = examine_contents
-fits_v0.0 = fits
 identifydspacefiles_v0.0 = identify_dspace_files
 identifydspacemetsfiles_v0.0 = identify_dspace_mets_files
 identifyfileformat_v0.0 = identify_file_format
diff --git a/src/MCPClient/lib/clientScripts/characterize_file.py b/src/MCPClient/lib/clientScripts/characterize_file.py
index f9c0cc239..dd875d5a2 100755
--- a/src/MCPClient/lib/clientScripts/characterize_file.py
+++ b/src/MCPClient/lib/clientScripts/characterize_file.py
@@ -5,7 +5,7 @@
 # b) Prints the tool's stdout, for tools which do not output XML
 #
 # If a tool has no defined characterization commands, then the default
-# will be run instead (currently FITS).
+# will be run instead.
 import argparse
 import dataclasses
 import multiprocessing
diff --git a/src/MCPClient/lib/clientScripts/fits.py b/src/MCPClient/lib/clientScripts/fits.py
deleted file mode 100755
index 6012d0e96..000000000
--- a/src/MCPClient/lib/clientScripts/fits.py
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/usr/bin/env python
-# This file is part of Archivematica.
-#
-# Copyright 2010-2013 Artefactual Systems Inc. <http://artefactual.com>
-#
-# Archivematica is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# Archivematica is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with Archivematica.  If not, see <http://www.gnu.org/licenses/>.
-import os
-import tempfile
-
-import django
-import lxml.etree as etree
-from archivematicaFunctions import getTagged
-from custom_handlers import get_script_logger
-from databaseFunctions import insertIntoFPCommandOutput
-from django.db import transaction
-from executeOrRunSubProcess import executeOrRun
-
-# archivematicaCommon
-
-django.setup()
-# dashboard
-from main.models import FPCommandOutput
-
-logger = get_script_logger("archivematica.mcp.client.FITS")
-
-FITSNS = "{http://hul.harvard.edu/ois/xml/ns/fits/fits_output}"
-
-
-def exclude_jhove_properties(fits):
-    """
-    Exclude <properties> from "/fits/toolOutput/tool[name=Jhove]/repInfo"
-    because that field contains unnecessary excess data and the key data are
-    covered by output from other FITS tools.
-    """
-    format_validation = None
-    tools = getTagged(getTagged(fits, FITSNS + "toolOutput")[0], FITSNS + "tool")
-    for tool in tools:
-        if tool.get("name") == "Jhove":
-            format_validation = tool
-            break
-    if format_validation is None:
-        return fits
-    repInfo = getTagged(format_validation, "repInfo")[0]
-    properties = getTagged(repInfo, "properties")
-    if len(properties):
-        repInfo.remove(properties[0])
-    return fits
-
-
-def main(target, xml_file, date, event_uuid, file_uuid, file_grpuse):
-    """
-    Note: xml_file, date and event_uuid are not being used.
-    """
-    if file_grpuse in ("DSPACEMETS", "maildirFile"):
-        logger.error("File's fileGrpUse in exclusion list, skipping")
-        return 0
-
-    if not FPCommandOutput.objects.filter(file=file_uuid).exists():
-        logger.error("Warning: Fits has already run on this file. Not running again.")
-        return 0
-
-    _, temp_file = tempfile.mkstemp()
-    args = ["fits.sh", "-i", target, "-o", temp_file]
-    try:
-        logger.info("Executing %s", args)
-        retcode, stdout, stderr = executeOrRun(
-            "command", args, printing=False, capture_output=True
-        )
-
-        if retcode != 0:
-            logger.error(
-                "fits.sh exited with status code %s, %s, %s", retcode, stdout, stderr
-            )
-            return retcode
-
-        try:
-            tree = etree.parse(temp_file)
-        except Exception:
-            logger.exception("Failed to read Fits's XML.")
-            return 2
-
-        fits = tree.getroot()
-        fits = exclude_jhove_properties(fits)
-
-        # NOTE: This is hardcoded for now because FPCommandOutput references FPRule for future development,
-        #       when characterization will become user-configurable and be decoupled from FITS specifically.
-        #       Thus a stub rule must exist for FITS; this will be replaced with a real rule in the future.
-        logger.info("Storing output of file characterization...")
-        insertIntoFPCommandOutput(
-            file_uuid,
-            etree.tostring(fits, pretty_print=False, encoding="utf8"),
-            "3a19de70-0e42-4145-976b-3a248d43b462",
-        )
-
-    except (OSError, ValueError):
-        logger.exception("Execution failed")
-        return 1
-
-    finally:
-        # We are responsible for removing the temporary file and we do it here
-        # to ensure that it's going to happen whatever occurs inside our try
-        # block.
-        os.remove(temp_file)
-
-    return 0
-
-
-def call(jobs):
-    with transaction.atomic():
-        for job in jobs:
-            with job.JobContext(logger=logger):
-                args = job.args[1:]
-                job.set_status(main(*args))
diff --git a/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py b/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py
index bbfd3e6d2..c0d711820 100755
--- a/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py
+++ b/src/MCPClient/lib/clientScripts/manual_normalization_create_metadata_and_restructure.py
@@ -120,7 +120,12 @@ def main(job):
     # We found the original file somewhere above
     job.print_output(
         "Matched original file %s (%s) to  preservation file %s (%s)"
-        % (original_file.currentlocation, original_file.uuid, filePath, fileUUID)
+        % (
+            original_file.currentlocation.decode(),
+            original_file.uuid,
+            filePath,
+            fileUUID,
+        )
     )
     # Generate the new preservation path: path/to/original/filename-uuid.ext
     basename = os.path.basename(filePath)
diff --git a/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py b/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py
index cbecf6fd7..2b0345b88 100755
--- a/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py
+++ b/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py
@@ -106,7 +106,7 @@ def main(job):
             }
             f = File.objects.get(**kwargs)
         else:
-            if isinstance(e, File.DoesNotExist, ValidationError):
+            if isinstance(e, (File.DoesNotExist, ValidationError)):
                 job.print_error(
                     "No matching file for: ",
                     opts.filePath.replace(opts.SIPDirectory, "%SIPDirectory%", 1),
diff --git a/src/MCPServer/lib/assets/workflow.json b/src/MCPServer/lib/assets/workflow.json
index 9a917cea5..a1d72cf02 100644
--- a/src/MCPServer/lib/assets/workflow.json
+++ b/src/MCPServer/lib/assets/workflow.json
@@ -1927,15 +1927,15 @@
       "config": {
         "@manager": "linkTaskManagerFiles",
         "@model": "StandardTaskConfig",
-        "arguments": "\"%relativeLocation%\" \"%SIPLogsDirectory%fileMeta/%fileUUID%.xml\" \"%date%\" \"%taskUUID%\" \"%fileUUID%\" \"%fileGrpUse%\"",
-        "execute": "FITS_v0.0",
+        "arguments": "\"%fileUUID%\" \"%SIPUUID%\"",
+        "execute": "characterizeFile_v0.0",
         "filter_subdir": "objects/manualNormalization/preservation"
       },
       "description": {
-        "en": "Run FITS on manual normalized preservation files",
-        "no": "Kjør FITS på manuelt normaliserte bevaringsfiler",
-        "pt_BR": "Executar o FITS em arquivos de preservação normalizados manuaimente",
-        "sv": "Kör FITS på manuellt normaliserade bevarandefiler"
+        "en": "Characterize and extract metadata on manual normalized preservation files",
+        "no": "Karakteriser og hent ut metadata på manuelt normaliserte bevaringsfiler",
+        "pt_BR": "Caracterizar e extrair metadados em arquivos de preservação normalizados manuaimente",
+        "sv": "Karaktärisera och extrahera metadata på manuellt normaliserade bevarandefiler"
       },
       "exit_codes": {
         "0": {
@@ -1948,7 +1948,7 @@
       "group": {
         "en": "Process manually normalized files",
         "es": "Procesar manualmente ficheros normalizados",
-        "no": "Prosesser normaliserte filer manuelt",
+        "no": "Prosesser manuelt normaliserte filer",
         "pt_BR": "Processar arquivos normalizados manualmente",
         "sv": "Bearbeta manuellt normaliserade filer"
       }
@@ -2934,11 +2934,11 @@
       "exit_codes": {
         "0": {
           "job_status": "Completed successfully",
-          "link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32"
+          "link_id": "1b1a4565-b501-407b-b40f-2f20889423f1"
         }
       },
       "fallback_job_status": "Failed",
-      "fallback_link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32",
+      "fallback_link_id": "1b1a4565-b501-407b-b40f-2f20889423f1",
       "group": {
         "en": "Extract packages",
         "es": "Extraer paquetes",
@@ -7667,7 +7667,7 @@
         }
       },
       "fallback_job_status": "Failed",
-      "fallback_link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32",
+      "fallback_link_id": "1b1a4565-b501-407b-b40f-2f20889423f1",
       "group": {
         "en": "Extract packages",
         "es": "Extraer paquetes",
@@ -9044,41 +9044,6 @@
         "sv": "Normalisera"
       }
     },
-    "bd382151-afd0-41bf-bb7a-b39aef728a32": {
-      "config": {
-        "@manager": "linkTaskManagerFiles",
-        "@model": "StandardTaskConfig",
-        "arguments": "\"%relativeLocation%\" \"%SIPLogsDirectory%fileMeta/%fileUUID%.xml\" \"%date%\" \"%taskUUID%\" \"%fileUUID%\" \"%fileGrpUse%\"",
-        "execute": "FITS_v0.0",
-        "filter_subdir": "objects/attachments"
-      },
-      "description": {
-        "en": "Characterize and extract metadata for attachments",
-        "es": "Caracterizar y extraer los metadatos de los adjuntos",
-        "fr": "Caractériser et extraire les métadonnées pour mettre en pièces jointes",
-        "ja": "添付ファイルのメタデータの特徴付けと抽出",
-        "no": "Karakteriser og hent ut metadata fra vedlegg",
-        "pt_BR": "Caracterizar e extrair metadados para anexos",
-        "sv": "Karaktärisera och extrahera metadata för bilagor"
-      },
-      "exit_codes": {
-        "0": {
-          "job_status": "Completed successfully",
-          "link_id": "1b1a4565-b501-407b-b40f-2f20889423f1"
-        }
-      },
-      "fallback_job_status": "Failed",
-      "fallback_link_id": "61c316a6-0a50-4f65-8767-1f44b1eeb6dd",
-      "group": {
-        "en": "Characterize and extract metadata",
-        "es": "Caracterizar y extraer metadatos",
-        "fr": "Caractériser et extraire les métadonnées",
-        "ja": "メタデータの特徴付けと抽出",
-        "no": "Karakteriser og hent ut metadata",
-        "pt_BR": "Caracterizar e extrair metadados",
-        "sv": "Karaktärisera och extrahera metadata"
-      }
-    },
     "bd792750-a55b-42e9-903a-8c898bb77df1": {
       "config": {
         "@manager": "linkTaskManagerDirectories",
@@ -9276,6 +9241,38 @@
         "sv": "Byt namn på SIP-mappen med SIP UUID"
       }
     },
+    "bf0ea0f6-211b-4b34-8f25-8a68145403c8": {
+      "config": {
+        "@manager": "linkTaskManagerFiles",
+        "@model": "StandardTaskConfig",
+        "arguments": "\"True\" \"%relativeLocation%\" \"%fileUUID%\" --disable-reidentify",
+        "execute": "identifyFileFormat_v0.0",
+        "filter_subdir": "objects/manualNormalization/preservation"
+      },
+      "description": {
+        "en": "Identify file format",
+        "es": "Identificar formato de fichero",
+        "fr": "Identifier le format de fichier",
+        "no": "Identifiser filformat",
+        "pt_BR": "Identifique o formato do arquivo",
+        "sv": "Identifiera filformat"
+      },
+      "exit_codes": {
+        "0": {
+          "job_status": "Completed successfully",
+          "link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8"
+        }
+      },
+      "fallback_job_status": "Failed",
+      "fallback_link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8",
+      "group": {
+        "en": "Process manually normalized files",
+        "es": "Procesar manualmente ficheros normalizados",
+        "no": "Prosesser manuelt normaliserte filer",
+        "pt_BR": "Processar arquivos normalizados manualmente",
+        "sv": "Bearbeta manuellt normaliserade filer"
+      }
+    },
     "c103b2fb-9a6b-4b68-8112-b70597a6cd14": {
       "config": {
         "@manager": "linkTaskManagerDirectories",
@@ -10760,7 +10757,7 @@
       "exit_codes": {
         "0": {
           "job_status": "Completed successfully",
-          "link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8"
+          "link_id": "bf0ea0f6-211b-4b34-8f25-8a68145403c8"
         }
       },
       "fallback_job_status": "Failed",
diff --git a/src/dashboard/src/fpr/migrations/0044_remove_fits.py b/src/dashboard/src/fpr/migrations/0044_remove_fits.py
new file mode 100644
index 000000000..85f53d009
--- /dev/null
+++ b/src/dashboard/src/fpr/migrations/0044_remove_fits.py
@@ -0,0 +1,12 @@
+from django.db import migrations
+
+
+def data_migration(apps, schema_editor):
+    FPTool = apps.get_model("fpr", "FPTool")
+    FPTool.objects.filter(description="FITS").delete()
+
+
+class Migration(migrations.Migration):
+    dependencies = [("fpr", "0043_update_default_thumbnail_command")]
+
+    operations = [migrations.RunPython(data_migration, migrations.RunPython.noop)]

From a78868e0725c3d41704a24765a6aabeffd7b4c24 Mon Sep 17 00:00:00 2001
From: Douglas Cerna <douglascerna@yahoo.com>
Date: Thu, 12 Sep 2024 11:38:08 -0600
Subject: [PATCH 5/8] Escape output and error in launchSubProcess

---
 pyproject.toml                                |  2 +
 .../lib/executeOrRunSubProcess.py             | 74 +++++++++++++------
 .../test_execute_functions.py                 | 30 +++++++-
 3 files changed, 79 insertions(+), 27 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 86d11bc48..37e416103 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,7 @@ warn_unused_configs = true
 
 [[tool.mypy.overrides]]
 module = [
+    "src.archivematicaCommon.lib.executeOrRunSubProcess",
     "src.MCPClient.lib.client.*",
     "src.MCPClient.lib.clientScripts.characterize_file",
     "src.MCPClient.lib.clientScripts.has_packages",
@@ -64,6 +65,7 @@ module = [
     "src.MCPClient.lib.clientScripts.policy_check",
     "src.MCPClient.lib.clientScripts.transcribe_file",
     "src.MCPClient.lib.clientScripts.validate_file",
+    "tests.archivematicaCommon.test_execute_functions",
     "tests.dashboard.fpr.test_views",
     "tests.MCPClient.conftest",
     "tests.MCPClient.test_characterize_file",
diff --git a/src/archivematicaCommon/lib/executeOrRunSubProcess.py b/src/archivematicaCommon/lib/executeOrRunSubProcess.py
index 86a377890..0b8ada6cf 100644
--- a/src/archivematicaCommon/lib/executeOrRunSubProcess.py
+++ b/src/archivematicaCommon/lib/executeOrRunSubProcess.py
@@ -21,16 +21,29 @@
 import subprocess
 import sys
 import tempfile
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Tuple
+from typing import Union
+
+from archivematicaFunctions import escape
+
+Arguments = List[str]
+Input = Union[str, bytes, io.IOBase]
+Environment = Dict[str, str]
+Command = Union[str, List[str]]
+Result = Tuple[int, str, str]
 
 
 def launchSubProcess(
-    command,
-    stdIn="",
-    printing=True,
-    arguments=None,
-    env_updates=None,
-    capture_output=False,
-):
+    command: Command,
+    stdIn: Input = "",
+    printing: bool = True,
+    arguments: Optional[Arguments] = None,
+    env_updates: Optional[Environment] = None,
+    capture_output: bool = False,
+) -> Result:
     """
     Launches a subprocess using ``command``, where ``command`` is either:
     a) a single string containing a commandline statement, or
@@ -89,7 +102,7 @@ def launchSubProcess(
             stdin_pipe = subprocess.PIPE
             communicate_input = stdIn
         elif isinstance(stdIn, io.IOBase):
-            stdin_pipe = stdIn
+            stdin_pipe = stdIn.fileno()
             communicate_input = None
         else:
             raise Exception("stdIn must be a string or a file object")
@@ -103,8 +116,8 @@ def launchSubProcess(
                 env=my_env,
             )
             std_out, std_error = p.communicate(input=communicate_input)
-            stdOut = std_out.decode()
-            stdError = std_error.decode()
+            stdOut = escape(std_out)
+            stdError = escape(std_error)
         else:
             # Ignore the stdout of the subprocess, capturing only stderr
             with open(os.devnull, "w") as devnull:
@@ -116,7 +129,7 @@ def launchSubProcess(
                     stderr=subprocess.PIPE,
                 )
                 __, std_error = p.communicate(input=communicate_input)
-                stdError = std_error.decode()
+                stdError = escape(std_error)
         retcode = p.returncode
         # If we are not capturing output and the subprocess has succeeded, set
         # its stderr to the empty string.
@@ -139,8 +152,13 @@ def launchSubProcess(
 
 
 def createAndRunScript(
-    text, stdIn="", printing=True, arguments=None, env_updates=None, capture_output=True
-):
+    text: Command,
+    stdIn: Input = "",
+    printing: bool = True,
+    arguments: Optional[Arguments] = None,
+    env_updates: Optional[Environment] = None,
+    capture_output: bool = True,
+) -> Result:
     if arguments is None:
         arguments = []
     if env_updates is None:
@@ -150,7 +168,10 @@ def createAndRunScript(
         encoding="utf-8", mode="wt", delete=False
     ) as tmpfile:
         os.chmod(tmpfile.name, 0o770)
-        tmpfile.write(text)
+        if isinstance(text, str):
+            tmpfile.write(text)
+        else:
+            tmpfile.write(" ".join(text))
         tmpfile.close()
         cmd = [tmpfile.name]
         cmd.extend(arguments)
@@ -168,14 +189,14 @@ def createAndRunScript(
 
 
 def executeOrRun(
-    type,
-    text,
-    stdIn="",
-    printing=True,
-    arguments=None,
-    env_updates=None,
-    capture_output=True,
-):
+    type: str,
+    text: Command,
+    stdIn: Input = "",
+    printing: bool = True,
+    arguments: Optional[Arguments] = None,
+    env_updates: Optional[Environment] = None,
+    capture_output: bool = True,
+) -> Result:
     """
     Attempts to run the provided command on the shell, with the text of
     "stdIn" passed as standard input if provided. The type parameter
@@ -220,7 +241,9 @@ def executeOrRun(
             capture_output=capture_output,
         )
     if type == "bashScript":
-        text = "#!/bin/bash\n" + text
+        if not isinstance(text, str):
+            raise ValueError("command must be a str")
+        text = f"#!/bin/bash\n{text}"
         return createAndRunScript(
             text,
             stdIn=stdIn,
@@ -230,7 +253,9 @@ def executeOrRun(
             capture_output=capture_output,
         )
     if type == "pythonScript":
-        text = "#!/usr/bin/env python\n" + text
+        if not isinstance(text, str):
+            raise ValueError("command must be a str")
+        text = f"#!/usr/bin/env python\n{text}"
         return createAndRunScript(
             text,
             stdIn=stdIn,
@@ -248,3 +273,4 @@ def executeOrRun(
             env_updates=env_updates,
             capture_output=capture_output,
         )
+    raise ValueError(f"unknown type {type}")
diff --git a/tests/archivematicaCommon/test_execute_functions.py b/tests/archivematicaCommon/test_execute_functions.py
index 0effb2719..0f1fa41b9 100644
--- a/tests/archivematicaCommon/test_execute_functions.py
+++ b/tests/archivematicaCommon/test_execute_functions.py
@@ -1,13 +1,16 @@
+import pathlib
 import shlex
 import tempfile
+from typing import Generator
 from unittest.mock import ANY
+from unittest.mock import Mock
 from unittest.mock import patch
 
 import executeOrRunSubProcess as execsub
 import pytest
 
 
-def test_capture_output():
+def test_capture_output() -> None:
     """Tests behaviour of capture_output when executing sub processes."""
 
     # Test that stdout and stderr are not captured by default
@@ -61,7 +64,7 @@ def test_capture_output():
 
 
 @pytest.fixture
-def temp_path(tmp_path):
+def temp_path(tmp_path: pathlib.Path) -> Generator[str, None, None]:
     """Creates custom temp path, yields the value, and resets to original value."""
 
     original_tempdir = tempfile.tempdir
@@ -73,7 +76,9 @@ def temp_path(tmp_path):
 
 
 @patch("executeOrRunSubProcess.launchSubProcess")
-def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp_path):
+def test_createAndRunScript_creates_tmpfile_in_custom_dir(
+    launchSubProcess: Mock, temp_path: str
+) -> None:
     """Tests execution of launchSubProcess when executing createAndRunScript."""
 
     script_content = "#!/bin/bash\necho 'Script output'\nexit 0"
@@ -89,3 +94,22 @@ def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp
     )
     args, _ = launchSubProcess.call_args
     assert args[0][0].startswith(temp_path)
+
+
+@patch("subprocess.Popen")
+def test_launchSubProcess_replaces_non_utf8_output_with_replacement_characters(
+    popen: Mock,
+) -> None:
+    communicate_return_code = 0
+    communicate_output = b"Output \xae"
+    communicate_error = b"Error \xae"
+    popen.return_value = Mock(
+        returncode=communicate_return_code,
+        **{"communicate.return_value": (communicate_output, communicate_error)},
+    )
+
+    code, stdout, stderr = execsub.launchSubProcess("mycommand", capture_output=True)
+
+    assert code == communicate_return_code
+    assert stdout == communicate_output.decode(errors="replace")
+    assert stderr == communicate_error.decode(errors="replace")

From 36738c9a0703cb5bc71dff6311c6191efda4c20f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jes=C3=BAs=20Garc=C3=ADa=20Crespo?= <jesus@sevein.com>
Date: Fri, 30 Aug 2024 05:57:26 +0000
Subject: [PATCH 6/8] Add missing settings fixture to normalize test

This change ensures that the test does not attempt to write to
`/var/archivematica` but to a temporary directory created by the ptyest
fixture.
---
 tests/MCPClient/test_normalize.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/MCPClient/test_normalize.py b/tests/MCPClient/test_normalize.py
index 6da999e71..025a34d5a 100644
--- a/tests/MCPClient/test_normalize.py
+++ b/tests/MCPClient/test_normalize.py
@@ -692,6 +692,7 @@ def test_normalization_fallbacks_to_default_thumbnail_rule_if_initial_command_fa
     fprule_thumbnail: fprmodels.FPRule,
     fpcommand_thumbnail: fprmodels.FPCommand,
     fprule_default_thumbnail: fprmodels.FPRule,
+    settings: pytest_django.fixtures.SettingsWrapper,
 ) -> None:
     expected_thumbnail_content = b"thumbnail image content"
     expected_thumbnail_path = (

From c353a38b6f5a5f47071d0eb6357e58be34bcc8c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jes=C3=BAs=20Garc=C3=ADa=20Crespo?= <jesus@sevein.com>
Date: Fri, 30 Aug 2024 06:35:40 +0000
Subject: [PATCH 7/8] Fix integrity errors in reingest tests

This commit fixes some integrity errors in test_reingest_mets when loading
fixtures with invalid foreign keys. It is unclear why these errors are not
reproducible in the Docker environment.

FAILED tests/test_reingest_mets.py::TestAddingNewFiles::test_add_metadata_csv - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '1' has an invalid foreign key: Events_agents.agent_id contains a value '1' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestAddingNewFiles::test_no_new_files - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '1' has an invalid foreign key: Events_agents.agent_id contains a value '1' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestAddingNewFiles::test_new_metadata_file_in_subdir - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '1' has an invalid foreign key: Events_agents.agent_id contains a value '1' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestAddingNewFiles::test_new_preservation_file - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '1' has an invalid foreign key: Events_agents.agent_id contains a value '1' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_file_id - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_all - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_checksum_type - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_preservation_derivative - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestUpdateObject::test_object_not_updated - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestUpdateObject::test_update_characterization - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestUpdateObject::test__update_premis_object - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk.
FAILED tests/test_reingest_mets.py::TestDeleteFiles::test_delete_file - django.db.utils.IntegrityError: The row in table 'Events_agents' with primary key '2' has an invalid foreign key: Events_agents.agent_id contains a value '3' that does not have a corresponding value in Agents.pk.
---
 tests/MCPClient/fixtures/reingest-checksum.json    |  2 +-
 tests/MCPClient/fixtures/reingest-file-id.json     |  2 +-
 .../MCPClient/fixtures/reingest-preservation.json  |  8 ++++----
 tests/MCPClient/test_reingest_mets.py              | 14 ++++++++++++--
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/tests/MCPClient/fixtures/reingest-checksum.json b/tests/MCPClient/fixtures/reingest-checksum.json
index 7ed18d3ea..b275e842c 100644
--- a/tests/MCPClient/fixtures/reingest-checksum.json
+++ b/tests/MCPClient/fixtures/reingest-checksum.json
@@ -12,7 +12,7 @@
     "fields": {
         "event_type": "message digest calculation",
         "event_id": "5a561b24-cc7e-4032-b005-f75de2ec558a",
-        "agents": [1,2],
+        "agents": [2,3],
         "event_detail": "program=\"python\"; module=\"hashlib.md5()\"",
         "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f",
         "event_outcome_detail": "ac63a92ba5a94c337e740d6f189200d0",
diff --git a/tests/MCPClient/fixtures/reingest-file-id.json b/tests/MCPClient/fixtures/reingest-file-id.json
index 4acb49352..98181bf19 100644
--- a/tests/MCPClient/fixtures/reingest-file-id.json
+++ b/tests/MCPClient/fixtures/reingest-file-id.json
@@ -5,7 +5,7 @@
     "fields": {
         "event_type": "format identification",
         "event_id": "44455720-d9a7-43a4-90b6-a9cdf6e5c8cc",
-        "agents": [1,2,3],
+        "agents": [2,3],
         "event_detail": "program=\"Fido\"; version=\"1.2\"",
         "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f",
         "event_outcome_detail": "fmt/9000",
diff --git a/tests/MCPClient/fixtures/reingest-preservation.json b/tests/MCPClient/fixtures/reingest-preservation.json
index a9073ee37..aec5c244c 100644
--- a/tests/MCPClient/fixtures/reingest-preservation.json
+++ b/tests/MCPClient/fixtures/reingest-preservation.json
@@ -26,7 +26,7 @@
     "fields": {
         "event_type": "normalization",
         "event_id": "291f9be4-d19a-4bcc-8e1c-d3f01e4a48b1",
-        "agents": [1,2,3],
+        "agents": [2,3],
         "event_detail": "ArchivematicaFPRCommandID=\"a34ddc9b-c922-4bb6-8037-bbe713332175\"; program=\"convert\"; version=\"Version: ImageMagick 6.7.7-10 2014-03-06 Q16 http://www.imagemagick.org\"\n",
         "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f",
         "event_outcome_detail": "%SIPDirectory%objects/evelyn_s_photo-d8cc7af7-284a-42f5-b7f4-e181a0efc35f.tif",
@@ -59,7 +59,7 @@
     "fields": {
         "event_type": "creation",
         "event_id": "a89e6b45-1ac0-49cc-9dda-a4d11ed63f2f",
-        "agents": [1,2,3],
+        "agents": [2,3],
         "event_detail": "",
         "file_uuid": "d8cc7af7-284a-42f5-b7f4-e181a0efc35f",
         "event_outcome_detail": "",
@@ -73,7 +73,7 @@
     "fields": {
         "event_type": "message digest calculation",
         "event_id": "5c505f21-4e9a-49aa-b7dd-ed699fd4f8ef",
-        "agents": [1,2,3],
+        "agents": [2,3],
         "event_detail": "program=\"python\"; module=\"hashlib.sha256()\"",
         "file_uuid": "d8cc7af7-284a-42f5-b7f4-e181a0efc35f",
         "event_outcome_detail": "d82448f154b9185bc777ecb0a3602760eb76ba85dd3098f073b2c91a03f571e9",
@@ -87,7 +87,7 @@
     "fields": {
         "event_type": "fixity check",
         "event_id": "94ca2dee-b136-4a74-b477-a0b938bb49e9",
-        "agents": [1,2,3],
+        "agents": [2,3],
         "event_detail": "program=\"python\"; module=\"hashlib.sha256()\"",
         "file_uuid": "d8cc7af7-284a-42f5-b7f4-e181a0efc35f",
         "event_outcome_detail": "d82448f154b9185bc777ecb0a3602760eb76ba85dd3098f073b2c91a03f571e9 verified",
diff --git a/tests/MCPClient/test_reingest_mets.py b/tests/MCPClient/test_reingest_mets.py
index 31280eb8f..b3a4d99fa 100644
--- a/tests/MCPClient/test_reingest_mets.py
+++ b/tests/MCPClient/test_reingest_mets.py
@@ -28,7 +28,12 @@
 class TestUpdateObject(TestCase):
     """Test updating the PREMIS:OBJECT in the techMD. (update_object)."""
 
-    fixture_files = ["sip-reingest.json", "files.json", "events-reingest.json"]
+    fixture_files = [
+        "agents.json",
+        "sip-reingest.json",
+        "files.json",
+        "events-reingest.json",
+    ]
     fixtures = [os.path.join(FIXTURES_DIR, p) for p in fixture_files]
 
     def setUp(self):
@@ -1839,7 +1844,12 @@ def test_new_preservation_file(self):
 class TestDeleteFiles(TestCase):
     """Test marking files in the METS as deleted. (delete_files)"""
 
-    fixture_files = ["sip-reingest.json", "files.json", "events-reingest.json"]
+    fixture_files = [
+        "agents.json",
+        "sip-reingest.json",
+        "files.json",
+        "events-reingest.json",
+    ]
     fixtures = [os.path.join(FIXTURES_DIR, p) for p in fixture_files]
 
     sip_uuid = "4060ee97-9c3f-4822-afaf-ebdf838284c3"

From d6b3869ca742c6be955e4bd34f640d59949941b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jes=C3=BAs=20Garc=C3=ADa=20Crespo?= <jesus@sevein.com>
Date: Fri, 30 Aug 2024 07:04:29 +0000
Subject: [PATCH 8/8] Update bagit-python

Use the dev/am-117 branch of the bagit-python fork which includes a fix for
environments where pkg_resources is not available, i.e. Python 3.12 where
setuptools is not bundled, even though we're currently listing setuptools as
a dependency of zope (used by gevent).
---
 requirements-dev.txt | 2 +-
 requirements.in      | 2 +-
 requirements.txt     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 3e3453b17..ed2d77877 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -19,7 +19,7 @@ attrs==24.2.0
     #   -r requirements.txt
     #   jsonschema
     #   referencing
-bagit @ git+https://github.com/artefactual-labs/bagit-python.git@4b8fde73b4e631461bfd7add87e200500d40ca21
+bagit @ git+https://github.com/artefactual-labs/bagit-python.git@902051d8410219f6c5f4ce6d43e5b272cf29e89b
     # via -r requirements.txt
 brotli==1.1.0
     # via -r requirements.txt
diff --git a/requirements.in b/requirements.in
index 7b5744aaf..4393cd65f 100644
--- a/requirements.in
+++ b/requirements.in
@@ -3,7 +3,7 @@ Django>=4.2,<5
 agentarchives
 amclient
 ammcpc
-git+https://github.com/artefactual-labs/bagit-python.git@4b8fde73b4e631461bfd7add87e200500d40ca21#egg=bagit
+git+https://github.com/artefactual-labs/bagit-python.git@902051d8410219f6c5f4ce6d43e5b272cf29e89b#egg=bagit
 brotli
 clamd
 django-autoslug
diff --git a/requirements.txt b/requirements.txt
index dbf479dc6..28c3e1fcc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,7 @@ attrs==24.2.0
     # via
     #   jsonschema
     #   referencing
-bagit @ git+https://github.com/artefactual-labs/bagit-python.git@4b8fde73b4e631461bfd7add87e200500d40ca21
+bagit @ git+https://github.com/artefactual-labs/bagit-python.git@902051d8410219f6c5f4ce6d43e5b272cf29e89b
     # via -r requirements.in
 brotli==1.1.0
     # via -r requirements.in