diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 5ba2fec4af..ee1f673fab 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -13,4 +13,5 @@ NOTKNOWN pyyaml skontar Svunknown -urllib \ No newline at end of file +urllib +aar diff --git a/README.md b/README.md index e98869107f..35918c4640 100644 --- a/README.md +++ b/README.md @@ -313,6 +313,7 @@ The following archive formats are currently supported by the auto-extractor: | Archive Format | File Extension | | -------------- | ---------------------------------------------- | | zip | .zip, .exe, .jar, .msi, .egg, .whl, .war, .ear | +| | .aar | | tar | .tar, .tgz, .tar.gz, .tar.xz, .tar.bz2 | | deb | .deb, .ipk | | rpm | .rpm | @@ -348,7 +349,7 @@ On windows systems, you may need: Windows has `Expand` installed by default, but `ar` and `7z` might need to be installed. If you want to run our test-suite or scan a zstd compressed file, We recommend installing this [7-zip-zstd](https://github.com/mcmilk/7-Zip-zstd) -fork of 7zip. We are currently using `7z` for extracting `jar`, `apk`, `msi`, `exe` and `rpm` files. +fork of 7zip. We are currently using `7z` for extracting `jar`, `apk`, `aar`, `msi`, `exe` and `rpm` files. To install `ar` you can install MinGW (which has binutils as a part of it) from [here](https://www.mingw-w64.org/downloads/#msys2) and run the downloaded .exe file. If you get an error about building libraries when you try to install from pip, diff --git a/cve_bin_tool/extractor.py b/cve_bin_tool/extractor.py index 8018b433e2..b3e9942318 100644 --- a/cve_bin_tool/extractor.py +++ b/cve_bin_tool/extractor.py @@ -90,6 +90,7 @@ def __init__(self, logger=None, error_mode=ErrorMode.TruncTrace): ".whl", ".war", ".ear", + ".aar", ], MIMES: [ "application/x-msdownload", diff --git a/cve_bin_tool/file.py b/cve_bin_tool/file.py index c609d0747f..595497c79f 100644 --- a/cve_bin_tool/file.py +++ b/cve_bin_tool/file.py @@ -47,3 +47,28 @@ def check_pe(_filename: str, signature: bytes) -> bool: def check_fake_test(_filename: str, signature: bytes) -> bool: """check for fake tests under windows.""" return signature == b"MZ\x90\x00" + + +def check_mach_o_32(_filename: str, signature: bytes) -> bool: + """Check for Mach-O 32-bit signature.""" + return signature[:4] == b"\xFE\xED\xFA\xCE" + + +def check_mach_o_64(_filename: str, signature: bytes) -> bool: + """Check for Mach-O 64-bit signature.""" + return signature[:4] == b"\xFE\xED\xFA\xCF" + + +def check_mach_o_universal(_filename: str, signature: bytes) -> bool: + """Check for Mach-O Universal Binary signature.""" + return signature[:4] == b"\xCA\xFE\xBA\xBE" + + +def check_ios_arm(_filename: str, signature: bytes) -> bool: + """Check for Mach-O Universal Binary signature.""" + return signature[:4] == b"\xCF\xFA\xED\xFE" + + +def check_wasm(_filename: str, signature: bytes) -> bool: + """Check for WebAssembly (WASM) signature.""" + return signature[:4] == b"\x00\x61\x73\x6D" diff --git a/cve_bin_tool/version_scanner.py b/cve_bin_tool/version_scanner.py index f889c6eb7f..0768b39d57 100644 --- a/cve_bin_tool/version_scanner.py +++ b/cve_bin_tool/version_scanner.py @@ -173,12 +173,17 @@ def is_executable(self, filename: str) -> tuple[bool, str | None]: "PE32 executable", "PE32+ executable", "Mach-O", + "WebAssembly", "YAFFS", ": data", *list(valid_files.keys()), ) ): - return False, None + # Fallback as not all types are covered by the File command + # On linux noticed Mach-O Universal Binary not recognized + if not is_binary(filename): + return False, None + # otherwise use python implementation of file elif not is_binary(filename): return False, None diff --git a/test/assets/single-byte.txt b/test/assets/single-byte.txt deleted file mode 100644 index 56a6051ca2..0000000000 --- a/test/assets/single-byte.txt +++ /dev/null @@ -1 +0,0 @@ -1 \ No newline at end of file diff --git a/test/assets/windows.txt b/test/assets/windows.txt deleted file mode 100644 index 7e6e0f1190..0000000000 --- a/test/assets/windows.txt +++ /dev/null @@ -1 +0,0 @@ -MZ \ No newline at end of file diff --git a/test/test_executable.py b/test/test_executable.py new file mode 100644 index 0000000000..22c07b11f4 --- /dev/null +++ b/test/test_executable.py @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Iain Coulter +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +CVE-bin-tool version scanner file/is_binary tests +""" + +from os import remove +from tempfile import NamedTemporaryFile + +from cve_bin_tool.version_scanner import VersionScanner + + +class TestFile: + """Tests the CVE Bin Tool file using 'file' command.""" + + def _write_magic_signature(self, f, signature): + """Helper function to write a magic signature to a file.""" + f.write(signature) + f.seek(0) + + def _check_file_type(self, file_type, signature, expected_result): + """Helper function to check if a file is binary based on its type.""" + with NamedTemporaryFile("w+b", suffix=file_type, delete=False) as f: + self._write_magic_signature(f, signature) + scanner = VersionScanner() + result, *_ = scanner.is_executable(f.name) + assert ( + result == expected_result + ), f"Expected {expected_result}, but got {result}" + remove(f.name) + + def _check_test(self, type): + """Helper function to parse a binary file and check whether + the given string is in the parsed result""" + file_signatures = { + "elf": (b"\x7f\x45\x4c\x46\x02\x01\x01\x03\n", True, ".out"), + "mach_o_32": (b"\xFE\xED\xFA\xCE\x00\x00\x00\x00", True, ".out"), + "mach_o_64": (b"\xFE\xED\xFA\xCF\x00\x00\x00\x00", True, ".out"), + "mach_o_universal": (b"\xCA\xFE\xBA\xBE\x00\x00\x00\x00", True, ".out"), + "ios_arm": (b"\xCF\xFA\xED\xFE\x00\x00\x00\x00", True, ".out"), + "wasm": (b"yoyo\x00\x61\x73\x6D\x01\x00\x00\x00", True, ".out"), + "c": (b"#include ", False, ".c"), + "single_byte": (b"1", False, ".txt"), + "windows": (b"MZ\x90\x00", True, ".dll"), + } + signature, expected_result, file_type = file_signatures.get( + type, (b"some other data\n", False, ".txt") + ) + self._check_file_type(file_type, signature, expected_result) + + def test_binary_elf_file(self): + """file *.out""" + self._check_test("elf") + + def test_binary_mach_o_32_file(self): + """file *.out""" + self._check_test("mach_o_32") + + def test_binary_mach_o_64_file(self): + """file *.out""" + self._check_test("mach_o_64") + + def test_binary_mach_o_universal_file(self): + """file *.out""" + self._check_test("mach_o_universal") + + def test_binary_ios_arm_file(self): + """file *.out""" + self._check_test("ios_arm") + + def test_binary_wasm_file(self): + """file *.out""" + self._check_test("wasm") + + def test_source_file(self): + """file *.c""" + self._check_test("c") + + def test_single_byte_file(self): + """file single-byte""" + self._check_test("single_byte") + + def test_windows(self): + """file *.txt""" + self._check_test("windows") + + def test_other_file(self): + """file *.txt""" + self._check_test("other") diff --git a/test/test_file.py b/test/test_file.py index c0dee7f2ca..1b54611070 100644 --- a/test/test_file.py +++ b/test/test_file.py @@ -4,51 +4,94 @@ """ CVE-bin-tool file tests """ -from pathlib import Path import pytest from cve_bin_tool.async_utils import NamedTemporaryFile, aio_rmfile from cve_bin_tool.file import aio_is_binary -ASSETS_PATH = Path(__file__).parent.resolve() / "assets" - class TestFile: """Tests the CVE Bin Tool file binary checker.""" + async def _write_magic_signature(self, f, signature): + """Helper function to write a magic signature to a file.""" + await f.write(signature) + await f.seek(0) + + async def _check_file_type(self, file_type, signature, expected_result): + """Helper function to check if a file is binary based on its type.""" + async with NamedTemporaryFile("w+b", suffix=file_type, delete=False) as f: + await self._write_magic_signature(f, signature) + assert await aio_is_binary(f.name) == expected_result + await aio_rmfile(f.name) + @pytest.mark.asyncio - async def _check_test(self, file_type): + async def _check_test(self, type): """Helper function to parse a binary file and check whether the given string is in the parsed result""" - async with NamedTemporaryFile("w+b", suffix=file_type, delete=False) as f: - if file_type == "out": - # write magic signature - await f.write(b"\x7f\x45\x4c\x46\x02\x01\x01\x03\n") - await f.seek(0) - assert await aio_is_binary(f.name) - else: - await f.write(b"some other data\n") - await f.seek(0) - assert not await aio_is_binary(f.name) - await aio_rmfile(f.name) + file_signatures = { + "elf": (b"\x7f\x45\x4c\x46\x02\x01\x01\x03\n", True, ".out"), + "mach_o_32": (b"\xFE\xED\xFA\xCE\x00\x00\x00\x00", True, ".out"), + "mach_o_64": (b"\xFE\xED\xFA\xCF\x00\x00\x00\x00", True, ".out"), + "mach_o_universal": (b"\xCA\xFE\xBA\xBE\x00\x00\x00\x00", True, ".out"), + "ios_arm": (b"\xCF\xFA\xED\xFE\x00\x00\x00\x00", True, ".out"), + "wasm": (b"\x00\x61\x73\x6D\x01\x00\x00\x00", True, ".out"), + "c": (b"#include ", False, ".c"), + "single_byte": (b"1", False, ".txt"), + "windows": (b"MZ", True, ".txt"), + } + signature, expected_result, file_type = file_signatures.get( + type, (b"some other data\n", False, ".txt") + ) + await self._check_file_type(file_type, signature, expected_result) + + @pytest.mark.asyncio + async def test_binary_elf_file(self): + """file *.out""" + await self._check_test("elf") + + @pytest.mark.asyncio + async def test_binary_mach_o_32_file(self): + """file *.out""" + await self._check_test("mach_o_32") @pytest.mark.asyncio - async def test_binary_out_file(self): + async def test_binary_mach_o_64_file(self): """file *.out""" - await self._check_test("out") + await self._check_test("mach_o_64") + + @pytest.mark.asyncio + async def test_binary_mach_o_universal_file(self): + """file *.out""" + await self._check_test("mach_o_universal") + + @pytest.mark.asyncio + async def test_binary_ios_arm_file(self): + """file *.out""" + await self._check_test("ios_arm") + + @pytest.mark.asyncio + async def test_binary_wasm_file(self): + """file *.out""" + await self._check_test("wasm") @pytest.mark.asyncio async def test_source_file(self): """file *.c""" await self._check_test("c") + @pytest.mark.asyncio + async def test_text_file(self): + """file *.txt""" + await self._check_test("other") + @pytest.mark.asyncio async def test_single_byte_file(self): """file single-byte""" - assert not await aio_is_binary(str(ASSETS_PATH / "single-byte.txt")) + await self._check_test("single_byte") @pytest.mark.asyncio async def test_windows(self): - """file single-byte""" - assert await aio_is_binary(str(ASSETS_PATH / "windows.txt")) + """file *.txt""" + await self._check_test("windows")