From f09aa68fdb3cc68c8bafe908adda57e82865836e Mon Sep 17 00:00:00 2001 From: Sergei Maertens Date: Mon, 30 Dec 2024 14:00:23 +0100 Subject: [PATCH 1/2] :art: [#4795] Use pattern matching for file validation edge cases This makes the cases that need to be handled a bit more readable rather than the contrived if/elif flows. --- src/openforms/formio/api/validators.py | 90 ++++++++++++++------------ 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/src/openforms/formio/api/validators.py b/src/openforms/formio/api/validators.py index af072a4d13..af1d042443 100644 --- a/src/openforms/formio/api/validators.py +++ b/src/openforms/formio/api/validators.py @@ -61,14 +61,26 @@ def __call__(self, value: UploadedFile) -> None: detected_mime_type = magic.from_buffer(head, mime=True) provided_mime_type = value.content_type or "application/octet-stream" - # gh #2520 - # application/x-ole-storage on Arch with shared-mime-info 2.0+155+gf4e7cbc-1 - if detected_mime_type in ["application/CDFV2", "application/x-ole-storage"]: - whole_file = head + value.read() - detected_mime_type = magic.from_buffer(whole_file, mime=True) + # bail early if no extension was provided + if not ext: + raise serializers.ValidationError( + _( + "Could not determine the file type. Please make sure the file name " + "has an extension." + ) + ) - if detected_mime_type == "image/heif": - detected_mime_type = "image/heic" + # remap detected mime types in some cases + match detected_mime_type: + # gh #2520 + # application/x-ole-storage on Arch with shared-mime-info 2.0+155+gf4e7cbc-1 + case "application/CDFV2" | "application/x-ole-storage": + whole_file = head + value.read() + detected_mime_type = magic.from_buffer(whole_file, mime=True) + # gh #2911 - see commit 8d59d2d95b140ec525759ae089c63277b7f64610 + # Note that the ``uncompress=True`` option *might* help. + case "image/heif": + detected_mime_type = "image/heic" if not ( self.any_allowed @@ -80,14 +92,6 @@ def __call__(self, value: UploadedFile) -> None: _("The provided file is not a valid file type.") ) - if not ext: - raise serializers.ValidationError( - _( - "Could not determine the file type. Please make sure the file name " - "has an extension." - ) - ) - # Contents is allowed. Do extension or submitted content_type agree? if provided_mime_type == "application/octet-stream": m = magic.Magic(extension=True) # pyright: ignore[reportCallIssue] @@ -108,31 +112,37 @@ def __call__(self, value: UploadedFile) -> None: raise serializers.ValidationError( _("The provided file is not a {file_type}.").format(file_type=f".{ext}") ) - # gh #4886 - # We need to accept text/plain as a valid MIME type for CSV files. - # If the file does not strictly follow the conventions of CSV (e.g. non-standard delimiters), - # may not be considered as a valid CSV. - elif ( - provided_mime_type == "text/csv" - and detected_mime_type == "text/plain" - and ext == "csv" - ): - return - elif detected_mime_type == "image/heic" and provided_mime_type in ( - "image/heic", - "image/heif", - ): - return - # gh #4658 - # Windows use application/x-zip-compressed as a mimetype for .zip files, which - # is deprecated but still we need to support it. Instead, the common case for - # zip files is application/zip or application/zip-compressed mimetype. - elif detected_mime_type == "application/zip" and provided_mime_type in ( - "application/zip-compressed", - "application/x-zip-compressed", - ): - return - elif provided_mime_type != detected_mime_type: + + # Handle edge cases where detection is not exact/reliable. + match (provided_mime_type, detected_mime_type, ext): + # gh #4886 + # We need to accept text/plain as a valid MIME type for CSV files. + # If the file does not strictly follow the conventions of CSV (e.g. + # non-standard delimiters), + # may not be considered as a valid CSV. + case ("text/csv", "text/plain", "csv"): + return + + # See earlier - heic/heif can be considered equivalent + case ("image/heic" | "image/heif", "image/heic", _): + return + + # gh #4658 + # Windows use application/x-zip-compressed as a mimetype for .zip files, + # which is deprecated but still we need to support it. Instead, the common + # case for zip files is application/zip or application/zip-compressed + # mimetype. + case ( + "application/zip-compressed" | "application/x-zip-compressed", + "application/zip", + "zip", + ): + return + + case _: + pass + + if provided_mime_type != detected_mime_type: raise serializers.ValidationError( _("The provided file is not a {file_type}.").format( filename=value.name, file_type=f".{ext}" From 4574fe8789ae4221e28153be151f84abd03d324e Mon Sep 17 00:00:00 2001 From: Sergei Maertens Date: Mon, 30 Dec 2024 14:10:03 +0100 Subject: [PATCH 2/2] :alien: [#4795] Handle libmagic 5.46 behaviour Validated with Sonny who's also using Arch - since libmagic 5.46 the detected content type for these 'exotic' zip formats no longer reports application/zip, but instead it returns application/octet-stream, or otherwise said: it doesn't know those magic bytes (anymore). Given the earlier patches, all we can do is allow these files to go through. Our Docker images are based on Debian bookworm, which ships libmagic 5.44. Debian unstable currently still has 5.44. --- src/openforms/formio/api/validators.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/openforms/formio/api/validators.py b/src/openforms/formio/api/validators.py index af1d042443..6fc85492a5 100644 --- a/src/openforms/formio/api/validators.py +++ b/src/openforms/formio/api/validators.py @@ -132,9 +132,10 @@ def __call__(self, value: UploadedFile) -> None: # which is deprecated but still we need to support it. Instead, the common # case for zip files is application/zip or application/zip-compressed # mimetype. + # libmagic 5.46+ doesn't detect application/zip anymore. case ( "application/zip-compressed" | "application/x-zip-compressed", - "application/zip", + "application/zip" | "application/octet-stream", "zip", ): return