diff --git a/src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py b/src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py index 26cc6def0..4e15902e6 100644 --- a/src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py +++ b/src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py @@ -336,6 +336,38 @@ def find_function_ref_strings(function_name): return strings +def find_function_constants(function_name): + """ + Get all constants that are used as operands in the function with name `function_name`. + + :param function_name: The name of the function. + :type function_name: str + :return: a list of int/long constants referenced in the function as strings + :rtype: list[str] + """ + try: + function = getGlobalFunctions(function_name)[0] + except (IndexError, TypeError): + print("Error: Function {} not found.".format(function_name)) + return [] + + constants = [] + if function is not None: + body = function.getBody() + instruction_iterator = currentProgram.getListing().getInstructions(body, True) + + for instruction in instruction_iterator: + for i in range(instruction.getNumOperands()): + for operand in instruction.getOpObjects(i): + try: + value = operand.getValue() + except AttributeError: + continue + if value is not None and isinstance(value, (int, long)): + constants.append(str(value)) + return constants + + def get_fstring_from_functions(ghidra_analysis, key_string, call_args, called_fstrings): """ :param ghidra_analysis: instance of GhidraAnalysis @@ -412,6 +444,7 @@ def find_version_strings(input_data, ghidra_analysis, result_path): print("Error: Function name not found.") return 1 result_list = find_function_ref_strings(function_name) + result_list.extend(find_function_constants(function_name)) else: print("Error: Invalid mode.") return 1 diff --git a/src/plugins/analysis/software_components/code/software_components.py b/src/plugins/analysis/software_components/code/software_components.py index 9c59bc267..363bfbc80 100644 --- a/src/plugins/analysis/software_components/code/software_components.py +++ b/src/plugins/analysis/software_components/code/software_components.py @@ -1,5 +1,7 @@ from __future__ import annotations +import json +import logging import re import string from typing import TYPE_CHECKING @@ -50,9 +52,26 @@ def get_version(self, input_string: str, meta_dict: dict) -> str: pattern = re.compile(regex) version = pattern.search(input_string) if version is not None: - return self._strip_leading_zeroes(version.group(0)) + version_string = version.group(0) + if '_sub_regex' in meta_dict: + version_string = self._convert_version_str(version_string, meta_dict) + else: + version_string = self._strip_leading_zeroes(version_string) + return version_string return '' + def _convert_version_str(self, version_str: str, meta_dict: dict): + """ + The metadata entry "_sub_regex" can be used to change the version string if it does not have the expected + format (e.g. add dots). The entry should contain a regex and replacement for `re.sub()` as JSON string + """ + try: + sub_regex, replacement = json.loads(meta_dict['_sub_regex']) + return re.sub(sub_regex, replacement, version_str) + except json.JSONDecodeError: + logging.warning(f'[{self.NAME}]: signature has invalid substitution regex: {meta_dict}') + return '' + @staticmethod def _get_summary(results: dict) -> list[str]: summary = set() @@ -86,9 +105,12 @@ def get_version_for_component(self, result, file_object: FileObject): 'mode': 'version_function', 'function_name': result['meta']['_version_function'], } - versions.update( - extract_data_from_ghidra(file_object.file_path, input_data, config.backend.docker_mount_base_dir) + ghidra_data = extract_data_from_ghidra( + file_object.file_path, input_data, config.backend.docker_mount_base_dir ) + for version_str in ghidra_data: + if version := self.get_version(version_str, result['meta']): + versions.add(version) if '' in versions and len(versions) > 1: # if there are actual version results, remove the "empty" result versions.remove('') result['meta']['version'] = list(versions) diff --git a/src/plugins/analysis/software_components/internal/resolve_version_format_string.py b/src/plugins/analysis/software_components/internal/resolve_version_format_string.py index bb5f300b6..03ce9a6cc 100644 --- a/src/plugins/analysis/software_components/internal/resolve_version_format_string.py +++ b/src/plugins/analysis/software_components/internal/resolve_version_format_string.py @@ -2,7 +2,6 @@ import json import logging -import re from contextlib import suppress from pathlib import Path from tempfile import TemporaryDirectory @@ -40,11 +39,7 @@ def extract_data_from_ghidra(file_path: str, input_data: dict, path: str) -> lis try: output_file = (tmp_dir_path / DOCKER_OUTPUT_FILE).read_text() - return filter_implausible_results(json.loads(output_file)) + return json.loads(output_file) except (json.JSONDecodeError, FileNotFoundError): logging.debug('[FSR]: output file could not be read') return [] - - -def filter_implausible_results(version_list: list[str]): - return [version for version in version_list if re.search(r'\d\.\d', version)] diff --git a/src/plugins/analysis/software_components/signatures/crypto.yara b/src/plugins/analysis/software_components/signatures/crypto.yara index 145bab09d..0edfad924 100644 --- a/src/plugins/analysis/software_components/signatures/crypto.yara +++ b/src/plugins/analysis/software_components/signatures/crypto.yara @@ -1,3 +1,16 @@ +rule mbed_TLS { + meta: + software_name = "mbed TLS" + open_source = true + website = "https://github.com/Mbed-TLS/mbedtls" + description = "embedded library for cryptography, X.509 certificate manipulation and the SSL/TLS and DTLS protocols" + strings: + // see https://github.com/Mbed-TLS/mbedtls/blob/b6860cf7f9f4be0cc60f36909f6a5887008fb408/include/mbedtls/build_info.h#L38 + $a = /mbed TLS \d+\.\d+\.\d+/ ascii + condition: + $a and no_text_file +} + rule OpenSSL { meta: @@ -24,4 +37,3 @@ rule SSLeay condition: $a and no_text_file } - diff --git a/src/plugins/analysis/software_components/signatures/software.yara b/src/plugins/analysis/software_components/signatures/software.yara index b149a1522..746c44850 100644 --- a/src/plugins/analysis/software_components/signatures/software.yara +++ b/src/plugins/analysis/software_components/signatures/software.yara @@ -54,6 +54,41 @@ rule jQuery $a } +rule file_libmagic { + meta: + software_name = "file" + open_source = true + website = "https://www.darwinsys.com/file/" + description = "file type guesser" + // versions are stored as decimal int with three digits + // (first digit: major version, remaining two digits: minor version) + version_regex = "\\d{3}" + _version_function = "magic_version" + // sub XYY -> X.YY + _sub_regex = "[\"(\\\\d)(\\\\d{2})\", \"\\\\1.\\\\2\"]" + strings: + // see https://github.com/file/file/blob/f7d05cade99ff4819b4de70445511037000f6b14/src/magic.c#L607 + $a = "magic_version" nocase ascii + condition: + $a and no_text_file +} + +rule OPKG { + meta: + software_name = "OPKG" + open_source = true + website = "https://openwrt.org/docs/guide-user/additional-software/opkg" + description = "Opkg lightweight embedded package manager" + version_regex = "[0-9a-z]{40} \\(\\d{4}-\\d{2}-\\d{2}\\)" + strings: + // the version is not stored as a number; instead a git commit hash and a date is used: [hash] ([YYYY-MM-DD]) + // see https://github.com/openwrt/opkg-lede/blob/38eccbb1fd694d4798ac1baf88f9ba83d1eac616/src/opkg-cl.c#L158 + $a = "opkg version %s\n" nocase ascii + $b = /[0-9a-z]{40} \(\d{4}-\d{2}-\d{2}\)/ ascii + condition: + $a and $b and no_text_file +} + rule Perl { meta: diff --git a/src/plugins/analysis/software_components/test/data/software_component_test_list.txt b/src/plugins/analysis/software_components/test/data/software_component_test_list.txt index 8a7b2a1e8..177b7abea 100644 --- a/src/plugins/analysis/software_components/test/data/software_component_test_list.txt +++ b/src/plugins/analysis/software_components/test/data/software_component_test_list.txt @@ -65,6 +65,7 @@ libpcap version 1.5.2 libsqlite3-3.8.11.1.so libupnp-1.6.18 lighttpd-1.4.18 +mbed TLS 2.16.3 nc -h for help netatalk-2.2.0 nginx version: nginx/1.13.3 diff --git a/src/plugins/analysis/software_components/test/data/version_function_constant.elf b/src/plugins/analysis/software_components/test/data/version_function_constant.elf new file mode 100755 index 000000000..a0b1dbb16 Binary files /dev/null and b/src/plugins/analysis/software_components/test/data/version_function_constant.elf differ diff --git a/src/plugins/analysis/software_components/test/test_plugin_software_components.py b/src/plugins/analysis/software_components/test/test_plugin_software_components.py index 439532383..4147a9db3 100644 --- a/src/plugins/analysis/software_components/test/test_plugin_software_components.py +++ b/src/plugins/analysis/software_components/test/test_plugin_software_components.py @@ -51,6 +51,7 @@ def test_process_object(self, analysis_plugin): ('OpenSSL 0.9.8zh', '0.9.8zh', {'version_regex': '\\d\\.\\d\\.\\d[a-z]{0,2}'}), ('Foo v1.2.3', 'v1.2.3', {'version_regex': 'v?\\d\\.\\d\\.\\d'}), ('Bar a.b', 'a.b', {'version_regex': '[a-z]\\.[a-z]'}), + ('524', '5.24', {'version_regex': r'\d{3}', '_sub_regex': '["(\\\\d)(\\\\d{2})", "\\\\1.\\\\2"]'}), ], ) def test_get_version(self, analysis_plugin, version, expected_output, meta_dict): diff --git a/src/plugins/analysis/software_components/test/test_resolve_version_format_string.py b/src/plugins/analysis/software_components/test/test_resolve_version_format_string.py index 0dcfd2af5..78f5d5803 100644 --- a/src/plugins/analysis/software_components/test/test_resolve_version_format_string.py +++ b/src/plugins/analysis/software_components/test/test_resolve_version_format_string.py @@ -4,7 +4,6 @@ from ..internal.resolve_version_format_string import ( extract_data_from_ghidra, - filter_implausible_results, ) @@ -14,29 +13,22 @@ ( 'format_string_arm-linux-gnueabihf', {'mode': 'format_string', 'key_string_list': ['get_version v%s']}, - ['1.2.3'], + '1.2.3', ), ( 'fake-liblzma', {'mode': 'version_function', 'function_name': 'lzma_version_string'}, - ['5.2.1'], + '5.2.1', + ), + ( + 'version_function_constant.elf', + {'mode': 'version_function', 'function_name': 'get_version'}, + '524', ), ], ) def test_extract_data_from_ghidra(backend_config, test_file, input_data, expected_output): test_file = Path(__file__).parent / 'data' / test_file result = extract_data_from_ghidra(str(test_file), input_data, str(backend_config.docker_mount_base_dir)) - assert len(result) == 1 - assert result == expected_output - - -@pytest.mark.parametrize( - ('test_input', 'expected_output'), - [ - ([], []), - (['1.2.3.4', 'foobar'], ['1.2.3.4']), - (['v1.2-r1234'], ['v1.2-r1234']), - ], -) -def test_filter_implausible_results(test_input, expected_output): - assert filter_implausible_results(test_input) == expected_output + assert len(result) >= 1 + assert result[0] == expected_output