From d17b16f5ae55eaed7f10e76ed065aa1cea06bede Mon Sep 17 00:00:00 2001 From: robinvandermolen Date: Thu, 19 Dec 2024 14:09:10 +0100 Subject: [PATCH] :bug: [#4795] Invert validation for .msg files The SDK cannot reliably determine which content type belongs to a .msg file, most notably on Linux and MacOS because the extension is not in the mime type database. This manifests as a file being uploaded with empty content-type. To allow these files to go through, the serializer must allow empty values for the 'type' field which contains the detected content type, and the backend must perform additional processing to determine the file type. We can do this by falling back to the generic case of 'binary file' (application/octet-stream) content type, and let libmagic figure out which extensions belong to the magic bytes, i.e. we look at the magic bytes to figure out what kind of file was provided, and we check the provided file extensions against the list of valid extensions for the detected file type. Backport-of: #4961 --- .../conf/locale/nl/LC_MESSAGES/django.po | 2 +- src/openforms/formio/api/validators.py | 41 +++++++++++------- src/openforms/formio/components/vanilla.py | 9 +--- src/openforms/formio/tests/files/test.msg | Bin 0 -> 11264 bytes src/openforms/formio/tests/test_validators.py | 34 +++++++++++++++ .../formio/tests/validation/test_file.py | 4 +- .../tests/e2e/test_input_validation.py | 10 ++--- 7 files changed, 69 insertions(+), 31 deletions(-) create mode 100644 src/openforms/formio/tests/files/test.msg diff --git a/src/openforms/conf/locale/nl/LC_MESSAGES/django.po b/src/openforms/conf/locale/nl/LC_MESSAGES/django.po index ee945803f5..50aab26f15 100644 --- a/src/openforms/conf/locale/nl/LC_MESSAGES/django.po +++ b/src/openforms/conf/locale/nl/LC_MESSAGES/django.po @@ -4591,7 +4591,7 @@ msgid "" "extension." msgstr "" "Het bestandstype kon niet bepaald worden. Controleer of de bestandsnaam met " -"een extensie eindigt (bijvoorbeel '.pdf' of '.png')." +"een extensie eindigt (bijvoorbeeld '.pdf' of '.png')." #: openforms/formio/components/vanilla.py:332 #, python-brace-format diff --git a/src/openforms/formio/api/validators.py b/src/openforms/formio/api/validators.py index 34dcd5aa13..af072a4d13 100644 --- a/src/openforms/formio/api/validators.py +++ b/src/openforms/formio/api/validators.py @@ -1,4 +1,5 @@ import logging +from pathlib import Path from typing import Iterable from django.core.files.uploadedfile import UploadedFile @@ -56,29 +57,40 @@ def __init__(self, allowed_mime_types: Iterable[str] | None = None): def __call__(self, value: UploadedFile) -> None: head = value.read(2048) - ext = value.name.split(".")[-1] - mime_type = magic.from_buffer(head, mime=True) + ext = Path(value.name or "").suffix[1:] + detected_mime_type = magic.from_buffer(head, mime=True) + provided_mime_type = value.content_type or "application/octet-stream" # gh #2520 # application/x-ole-storage on Arch with shared-mime-info 2.0+155+gf4e7cbc-1 - if mime_type in ["application/CDFV2", "application/x-ole-storage"]: + if detected_mime_type in ["application/CDFV2", "application/x-ole-storage"]: whole_file = head + value.read() - mime_type = magic.from_buffer(whole_file, mime=True) + detected_mime_type = magic.from_buffer(whole_file, mime=True) - if mime_type == "image/heif": - mime_type = "image/heic" + if detected_mime_type == "image/heif": + detected_mime_type = "image/heic" if not ( self.any_allowed - or mimetype_allowed(mime_type, self._regular_mimes, self._wildcard_mimes) + or mimetype_allowed( + detected_mime_type, self._regular_mimes, self._wildcard_mimes + ) ): raise serializers.ValidationError( _("The provided file is not a valid file type.") ) + if not ext: + raise serializers.ValidationError( + _( + "Could not determine the file type. Please make sure the file name " + "has an extension." + ) + ) + # Contents is allowed. Do extension or submitted content_type agree? - if value.content_type == "application/octet-stream": - m = magic.Magic(extension=True) + if provided_mime_type == "application/octet-stream": + m = magic.Magic(extension=True) # pyright: ignore[reportCallIssue] extensions = m.from_buffer(head).split("/") # magic db doesn't know any more specific extension(s), so accept the # file @@ -101,27 +113,26 @@ def __call__(self, value: UploadedFile) -> None: # If the file does not strictly follow the conventions of CSV (e.g. non-standard delimiters), # may not be considered as a valid CSV. elif ( - value.content_type == "text/csv" - and mime_type == "text/plain" + provided_mime_type == "text/csv" + and detected_mime_type == "text/plain" and ext == "csv" ): return - elif mime_type == "image/heic" and value.content_type in ( + elif detected_mime_type == "image/heic" and provided_mime_type in ( "image/heic", "image/heif", ): return - # gh #4658 # Windows use application/x-zip-compressed as a mimetype for .zip files, which # is deprecated but still we need to support it. Instead, the common case for # zip files is application/zip or application/zip-compressed mimetype. - elif mime_type == "application/zip" and value.content_type in ( + elif detected_mime_type == "application/zip" and provided_mime_type in ( "application/zip-compressed", "application/x-zip-compressed", ): return - elif mime_type != value.content_type: + elif provided_mime_type != detected_mime_type: raise serializers.ValidationError( _("The provided file is not a {file_type}.").format( filename=value.name, file_type=f".{ext}" diff --git a/src/openforms/formio/components/vanilla.py b/src/openforms/formio/components/vanilla.py index 43d9e10be5..2d9dd1e9d4 100644 --- a/src/openforms/formio/components/vanilla.py +++ b/src/openforms/formio/components/vanilla.py @@ -311,14 +311,7 @@ class FileSerializer(serializers.Serializer): originalName = serializers.CharField(trim_whitespace=False) size = serializers.IntegerField(min_value=0) storage = serializers.ChoiceField(choices=["url"]) - type = serializers.CharField( - error_messages={ - "blank": _( - "Could not determine the file type. Please make sure the file name " - "has an extension." - ), - } - ) + type = serializers.CharField(required=True, allow_blank=True) url = serializers.URLField() data = FileDataSerializer() # type: ignore diff --git a/src/openforms/formio/tests/files/test.msg b/src/openforms/formio/tests/files/test.msg new file mode 100644 index 0000000000000000000000000000000000000000..b597796de62bb5b61b4584d863f83cb1cf44672c GIT binary patch literal 11264 zcmeHNO>7&-6`ti%f0XqX)pqR0Y6>Cd|&G%a($OuHp^yNiBL^?8k9P0fl`}FR|IVnZPLgFSezxwJM;1h7Sh@&4?Tt0 zt2FtqB2Fb^?l1{wmU_>zB|=+Z1!Y{>=@F)Ba=H)7!ix0zzaoNfnU+@r-)75fj-C#? z?D08EyrwA*QA|)-y8M-G`A3L<;P0bSq}wWM5Mcc)UR{{d+j!9nEkr|>Z4@Eb4vLsp ztZ__P7sYOh_fe!Rfb~&CT|cGh6ZxI+<&nVMT>tWXAT_8OUU^0n>p-3tXmmH$f4Q`) z0#_84<5yHQL>2nX zs`z`UEo>VEKm0*-AhFDT(Ft$z)6DQr?z{$aDK%>PIL`#T8#u$y6bBlyXGgbgd{AN(oU>#F?2 zo|pE&AHJ*o_4gmiyDGl)A7~i9{GU+NKgbh0rHYTWvWFsqKmREu`3L16d({sGx*z@| zaj&%h3Lgymq{=^ZzyA6IzS{m9lz-`4LIkzt4}8eK?*1cm8GE`a|L||??|;CD4_;UQ z1+J6Z2H{!^tt%t+3E5|KKaw;e37TjrY7NqM`N$P z^NZenKP|F9X+1OKPHAhW>DGOoZpJyC!6x~~jAhw0n_*z~3f+^V%w}U0O>uW_6K#2S z_Hpv2QK}MBImZ~~*_?BX4964j+Rsd^ zt9RDya;hc>b#$85CiPcNNf6A+w)ay1nq7bQdjC9?nT|hYIt{P?9x?#lrg-MrspU6g zFTSSJO^TH-T^>jd|0StUI>vWf#x-5LwGmkhy{doK)*o8y`3K8{&!236y6vsL!`S6mIUpVhX%r^x$&|54Zf(qO|$%fnY# zUS*t8%gfiomp9J}{+9;3680p<;C1Vp9H&~?s|f!1YWGi)cUAmjgbN=G!5?2rA?-G< z@}&D$274OzwJQFj)K-oE|Be6m2;svoK&ZukLTKNJivDfto+|%G3Adj7-Ftq_e_j0# zj1ON$Fz?GhMu7RRi?6|dIz&;Ge|3C(e^3{{DgLD#nn}U849)b@=(`ddX`_8K^WNRS zClf#fnac5uv9<&`1Wf*RHOAVv(K@@;gL)Zs_D?~}vM398Bk%r3*iUf4i-~$EFP-%aq5k$s^oTLGZL5#617dz$`9U7v6yta2??#xl zy!5envB8J1zq8|K$p4)w#XtPSe>RXx=rUN4Pik)~%qo g^ibbcFZB-Ryt}Tx5dGoY1x