Skip to content

Commit

Permalink
feat: added the capability to extract version constants to the FSR
Browse files Browse the repository at this point in the history
and also added a substitution option for software signatures to change the format (e.g. add dots)
  • Loading branch information
jstucke committed Dec 5, 2024
1 parent 8c5522c commit 08eb5b6
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 27 deletions.
33 changes: 33 additions & 0 deletions src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,38 @@ def find_function_ref_strings(function_name):
return strings


def find_function_constants(function_name):
"""
Get all constants that are used as operands in the function with name `function_name`.
:param function_name: The name of the function.
:type function_name: str
:return: a list of int/long constants referenced in the function as strings
:rtype: list[str]
"""
try:
function = getGlobalFunctions(function_name)[0]
except (IndexError, TypeError):
print("Error: Function {} not found.".format(function_name))
return []

constants = []
if function is not None:
body = function.getBody()
instruction_iterator = currentProgram.getListing().getInstructions(body, True)

for instruction in instruction_iterator:
for i in range(instruction.getNumOperands()):
for operand in instruction.getOpObjects(i):
try:
value = operand.getValue()
except AttributeError:
continue
if value is not None and isinstance(value, (int, long)):
constants.append(str(value))
return constants


def get_fstring_from_functions(ghidra_analysis, key_string, call_args, called_fstrings):
"""
:param ghidra_analysis: instance of GhidraAnalysis
Expand Down Expand Up @@ -412,6 +444,7 @@ def find_version_strings(input_data, ghidra_analysis, result_path):
print("Error: Function name not found.")
return 1
result_list = find_function_ref_strings(function_name)
result_list.extend(find_function_constants(function_name))
else:
print("Error: Invalid mode.")
return 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from __future__ import annotations

import json
import re
import string
from typing import TYPE_CHECKING

from sqlalchemy.testing.plugin.plugin_base import logging

import config
from analysis.YaraPluginBase import YaraBasePlugin
from helperFunctions.data_conversion import make_unicode_string
Expand Down Expand Up @@ -50,9 +53,26 @@ def get_version(self, input_string: str, meta_dict: dict) -> str:
pattern = re.compile(regex)
version = pattern.search(input_string)
if version is not None:
return self._strip_leading_zeroes(version.group(0))
version_string = version.group(0)
if '_sub_regex' in meta_dict:
version_string = self._convert_version_str(version_string, meta_dict)
else:
version_string = self._strip_leading_zeroes(version_string)
return version_string
return ''

def _convert_version_str(self, version_str: str, meta_dict: dict):
"""
The metadata entry "_sub_regex" can be used to change the version string if it does not have the expected
format (e.g. add dots). The entry should contain a regex and replacement for `re.sub()` as JSON string
"""
try:
sub_regex, replacement = json.loads(meta_dict['_sub_regex'])
return re.sub(sub_regex, replacement, version_str)
except json.JSONDecodeError:
logging.warning(f'[{self.NAME}]: signature has invalid substitution regex: {meta_dict}')
return ''

@staticmethod
def _get_summary(results: dict) -> list[str]:
summary = set()
Expand Down Expand Up @@ -86,9 +106,12 @@ def get_version_for_component(self, result, file_object: FileObject):
'mode': 'version_function',
'function_name': result['meta']['_version_function'],
}
versions.update(
extract_data_from_ghidra(file_object.file_path, input_data, config.backend.docker_mount_base_dir)
ghidra_data = extract_data_from_ghidra(
file_object.file_path, input_data, config.backend.docker_mount_base_dir
)
for version_str in ghidra_data:
if version := self.get_version(version_str, result['meta']):
versions.add(version)
if '' in versions and len(versions) > 1: # if there are actual version results, remove the "empty" result
versions.remove('')
result['meta']['version'] = list(versions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import json
import logging
import re
from contextlib import suppress
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -40,11 +39,7 @@ def extract_data_from_ghidra(file_path: str, input_data: dict, path: str) -> lis

try:
output_file = (tmp_dir_path / DOCKER_OUTPUT_FILE).read_text()
return filter_implausible_results(json.loads(output_file))
return json.loads(output_file)
except (json.JSONDecodeError, FileNotFoundError):
logging.debug('[FSR]: output file could not be read')
return []


def filter_implausible_results(version_list: list[str]):
return [version for version in version_list if re.search(r'\d\.\d', version)]
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ rule OPKG {
open_source = true
website = "https://openwrt.org/docs/guide-user/additional-software/opkg"
description = "Opkg lightweight embedded package manager"
// the version is not stored as a number; instead a git commit hash and a date is used: [hash] ([YYYY-MM-DD])
version_regex = "[0-9a-z]{40} \\(\\d{4}-\\d{2}-\\d{2}\\)"
strings:
// the version is not stored as a number; instead a git commit hash and a date is used: [hash] ([YYYY-MM-DD])
// see https://github.com/openwrt/opkg-lede/blob/38eccbb1fd694d4798ac1baf88f9ba83d1eac616/src/opkg-cl.c#L158
$a = "opkg version %s\n" nocase ascii
$b = /[0-9a-z]{40} \(\d{4}-\d{2}-\d{2}\)/ ascii
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def test_process_object(self, analysis_plugin):
('OpenSSL 0.9.8zh', '0.9.8zh', {'version_regex': '\\d\\.\\d\\.\\d[a-z]{0,2}'}),
('Foo v1.2.3', 'v1.2.3', {'version_regex': 'v?\\d\\.\\d\\.\\d'}),
('Bar a.b', 'a.b', {'version_regex': '[a-z]\\.[a-z]'}),
('524', '5.24', {'version_regex': r'\d{3}', '_sub_regex': '["(\\\\d)(\\\\d{2})", "\\\\1.\\\\2"]'}),
],
)
def test_get_version(self, analysis_plugin, version, expected_output, meta_dict):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from ..internal.resolve_version_format_string import (
extract_data_from_ghidra,
filter_implausible_results,
)


Expand All @@ -14,29 +13,22 @@
(
'format_string_arm-linux-gnueabihf',
{'mode': 'format_string', 'key_string_list': ['get_version v%s']},
['1.2.3'],
'1.2.3',
),
(
'fake-liblzma',
{'mode': 'version_function', 'function_name': 'lzma_version_string'},
['5.2.1'],
'5.2.1',
),
(
'version_function_constant.elf',
{'mode': 'version_function', 'function_name': 'get_version'},
'524',
),
],
)
def test_extract_data_from_ghidra(backend_config, test_file, input_data, expected_output):
test_file = Path(__file__).parent / 'data' / test_file
result = extract_data_from_ghidra(str(test_file), input_data, str(backend_config.docker_mount_base_dir))
assert len(result) == 1
assert result == expected_output


@pytest.mark.parametrize(
('test_input', 'expected_output'),
[
([], []),
(['1.2.3.4', 'foobar'], ['1.2.3.4']),
(['v1.2-r1234'], ['v1.2-r1234']),
],
)
def test_filter_implausible_results(test_input, expected_output):
assert filter_implausible_results(test_input) == expected_output
assert len(result) >= 1
assert result[0] == expected_output

0 comments on commit 08eb5b6

Please sign in to comment.