Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more software signatures #1301

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,38 @@ def find_function_ref_strings(function_name):
return strings


def find_function_constants(function_name):
"""
Get all constants that are used as operands in the function with name `function_name`.

:param function_name: The name of the function.
:type function_name: str
:return: a list of int/long constants referenced in the function as strings
:rtype: list[str]
"""
try:
function = getGlobalFunctions(function_name)[0]
except (IndexError, TypeError):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this raise a type error?

print("Error: Function {} not found.".format(function_name))
return []

constants = []
if function is not None:
body = function.getBody()
instruction_iterator = currentProgram.getListing().getInstructions(body, True)

for instruction in instruction_iterator:
for i in range(instruction.getNumOperands()):
for operand in instruction.getOpObjects(i):
try:
value = operand.getValue()
except AttributeError:
continue
if value is not None and isinstance(value, (int, long)):
constants.append(str(value))
return constants
Comment on lines +355 to +368
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I personally prefer less indentation, which in this case can be archived by adding:

if function is None:
    return []

Also: Why can function be none if we did not get an exception in the call to getGlobalFunctions?!
I think it cannot.



def get_fstring_from_functions(ghidra_analysis, key_string, call_args, called_fstrings):
"""
:param ghidra_analysis: instance of GhidraAnalysis
Expand Down Expand Up @@ -412,6 +444,7 @@ def find_version_strings(input_data, ghidra_analysis, result_path):
print("Error: Function name not found.")
return 1
result_list = find_function_ref_strings(function_name)
result_list.extend(find_function_constants(function_name))
else:
print("Error: Invalid mode.")
return 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import json
import logging
import re
import string
from typing import TYPE_CHECKING
Expand Down Expand Up @@ -50,9 +52,26 @@ def get_version(self, input_string: str, meta_dict: dict) -> str:
pattern = re.compile(regex)
version = pattern.search(input_string)
if version is not None:
return self._strip_leading_zeroes(version.group(0))
version_string = version.group(0)
if '_sub_regex' in meta_dict:
version_string = self._convert_version_str(version_string, meta_dict)
else:
version_string = self._strip_leading_zeroes(version_string)
return version_string
return ''

def _convert_version_str(self, version_str: str, meta_dict: dict):
"""
The metadata entry "_sub_regex" can be used to change the version string if it does not have the expected
format (e.g. add dots). The entry should contain a regex and replacement for `re.sub()` as JSON string
"""
try:
sub_regex, replacement = json.loads(meta_dict['_sub_regex'])
return re.sub(sub_regex, replacement, version_str)
except json.JSONDecodeError:
logging.warning(f'[{self.NAME}]: signature has invalid substitution regex: {meta_dict}')
return ''
Comment on lines +63 to +73
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function should not take the meta_dict but rather the regex as parameter.
Also storing a python regex as json encoded array in a yara file seems a bit too complex to me (As can be seen by the amount of backslashes).
What do you think about formatting it in two separate fields:

rule file_libmagic {
	meta:
		software_name = "file"
		// versions are stored as decimal int with three digits
		// (first digit: major version, remaining two digits: minor version)
		version_regex = "\\d{3}"
		_version_function = "magic_version"
                _sub_regex = "(\\d)(\\d{2})"
                _sub_replacement = "\\1.\\2"
                ...
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, what is the difference between the version_regex and the _sub_regex?
Could (should?) they be merged?

Copy link
Collaborator Author

@jstucke jstucke Dec 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

version_regex is for reading in the version from the matched string (if it doesn't have the default 1.2.3 form) and _sub_regex (short for substitution regex) is something new that I had to think of, because file/libmagic came with a version in the form XYY (e.g. 524) and there is no way to read this in as X.YY without it. _sub_regex and _sub_replacement are the inputs for re.sub()

Why is XYY not enough in this case? Because it is stored as X.YY in the CVE data and we cannot match it otherwise


@staticmethod
def _get_summary(results: dict) -> list[str]:
summary = set()
Expand Down Expand Up @@ -86,9 +105,12 @@ def get_version_for_component(self, result, file_object: FileObject):
'mode': 'version_function',
'function_name': result['meta']['_version_function'],
}
versions.update(
extract_data_from_ghidra(file_object.file_path, input_data, config.backend.docker_mount_base_dir)
ghidra_data = extract_data_from_ghidra(
file_object.file_path, input_data, config.backend.docker_mount_base_dir
)
for version_str in ghidra_data:
if version := self.get_version(version_str, result['meta']):
versions.add(version)
if '' in versions and len(versions) > 1: # if there are actual version results, remove the "empty" result
versions.remove('')
result['meta']['version'] = list(versions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import json
import logging
import re
from contextlib import suppress
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -40,11 +39,7 @@ def extract_data_from_ghidra(file_path: str, input_data: dict, path: str) -> lis

try:
output_file = (tmp_dir_path / DOCKER_OUTPUT_FILE).read_text()
return filter_implausible_results(json.loads(output_file))
return json.loads(output_file)
except (json.JSONDecodeError, FileNotFoundError):
logging.debug('[FSR]: output file could not be read')
return []


def filter_implausible_results(version_list: list[str]):
return [version for version in version_list if re.search(r'\d\.\d', version)]
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ rule OPKG {
open_source = true
website = "https://openwrt.org/docs/guide-user/additional-software/opkg"
description = "Opkg lightweight embedded package manager"
// the version is not stored as a number; instead a git commit hash and a date is used: [hash] ([YYYY-MM-DD])
version_regex = "[0-9a-z]{40} \\(\\d{4}-\\d{2}-\\d{2}\\)"
strings:
// the version is not stored as a number; instead a git commit hash and a date is used: [hash] ([YYYY-MM-DD])
// see https://github.com/openwrt/opkg-lede/blob/38eccbb1fd694d4798ac1baf88f9ba83d1eac616/src/opkg-cl.c#L158
$a = "opkg version %s\n" nocase ascii
$b = /[0-9a-z]{40} \(\d{4}-\d{2}-\d{2}\)/ ascii
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def test_process_object(self, analysis_plugin):
('OpenSSL 0.9.8zh', '0.9.8zh', {'version_regex': '\\d\\.\\d\\.\\d[a-z]{0,2}'}),
('Foo v1.2.3', 'v1.2.3', {'version_regex': 'v?\\d\\.\\d\\.\\d'}),
('Bar a.b', 'a.b', {'version_regex': '[a-z]\\.[a-z]'}),
('524', '5.24', {'version_regex': r'\d{3}', '_sub_regex': '["(\\\\d)(\\\\d{2})", "\\\\1.\\\\2"]'}),
],
)
def test_get_version(self, analysis_plugin, version, expected_output, meta_dict):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from ..internal.resolve_version_format_string import (
extract_data_from_ghidra,
filter_implausible_results,
)


Expand All @@ -14,29 +13,22 @@
(
'format_string_arm-linux-gnueabihf',
{'mode': 'format_string', 'key_string_list': ['get_version v%s']},
['1.2.3'],
'1.2.3',
),
(
'fake-liblzma',
{'mode': 'version_function', 'function_name': 'lzma_version_string'},
['5.2.1'],
'5.2.1',
),
(
'version_function_constant.elf',
{'mode': 'version_function', 'function_name': 'get_version'},
'524',
),
],
)
def test_extract_data_from_ghidra(backend_config, test_file, input_data, expected_output):
test_file = Path(__file__).parent / 'data' / test_file
result = extract_data_from_ghidra(str(test_file), input_data, str(backend_config.docker_mount_base_dir))
assert len(result) == 1
assert result == expected_output


@pytest.mark.parametrize(
('test_input', 'expected_output'),
[
([], []),
(['1.2.3.4', 'foobar'], ['1.2.3.4']),
(['v1.2-r1234'], ['v1.2-r1234']),
],
)
def test_filter_implausible_results(test_input, expected_output):
assert filter_implausible_results(test_input) == expected_output
assert len(result) >= 1
assert result[0] == expected_output
Loading