diff --git a/src/matrix_content_scanner/config.py b/src/matrix_content_scanner/config.py index 3d9a401..c67e326 100644 --- a/src/matrix_content_scanner/config.py +++ b/src/matrix_content_scanner/config.py @@ -83,6 +83,7 @@ def _parse_size(size: Optional[Union[str, float]]) -> Optional[float]: "script": {"type": "string"}, "temp_directory": {"type": "string"}, "removal_command": {"type": "string"}, + "check_claimed_mimetype": {"type": "boolean"}, "allowed_mimetypes": {"type": "array", "items": {"type": "string"}}, }, }, @@ -139,6 +140,7 @@ class ScanConfig: script: str temp_directory: str removal_command: str = "rm" + check_claimed_mimetype: bool = True allowed_mimetypes: Optional[List[str]] = None diff --git a/src/matrix_content_scanner/scanner/scanner.py b/src/matrix_content_scanner/scanner/scanner.py index e9f1cf0..e5ec41a 100644 --- a/src/matrix_content_scanner/scanner/scanner.py +++ b/src/matrix_content_scanner/scanner/scanner.py @@ -82,6 +82,8 @@ def __init__(self, mcs: "MatrixContentScanner"): self._max_size_to_cache = mcs.config.result_cache.max_file_size + self._check_claimed_mimetype = mcs.config.scan.check_claimed_mimetype + # List of MIME types we should allow. If None, we don't fail files based on their # MIME types (besides comparing it with the Content-Type header from the server # for unencrypted files). @@ -526,7 +528,7 @@ def _check_mimetype( # Check if the MIME type is matching the one that's expected, but only if the file # is not encrypted (because otherwise we'll always have 'application/octet-stream' # in the Content-Type header regardless of the actual MIME type of the file). - if encrypted is False and detected_mimetype != claimed_mimetype: + if self._check_claimed_mimetype and encrypted is False and detected_mimetype != claimed_mimetype: logger.error( "Mismatching MIME type (%s) and Content-Type header (%s)", detected_mimetype,