From 9226a0b7e485da3369346356cc00cbd76ed5a337 Mon Sep 17 00:00:00 2001 From: Andreu Date: Thu, 18 Jan 2024 13:08:02 +0100 Subject: [PATCH] Build a compound ID with a combination of attributes. --- README.md | 13 ++++++++++++- src/newrelic_logging/query_env.py | 5 +++++ src/newrelic_logging/salesforce.py | 16 +++++++++++++++- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b12d3b5..cbd639b 100644 --- a/README.md +++ b/README.md @@ -184,7 +184,18 @@ Queries for `EventLogFile` requiere the following fields to be present: - `LogDate` - `LogFile` -For queries of other event types there is no minimum set of attributes requiered, but they will only be cached (when `cache_enabled` is `True`) if `Id` is present. +For queries of other event types there is no minimum set of attributes requiered, but a unique identifier is requiered to be able to store the events on Redis (when `cache_enabled` is `True`). If the `Id` field is present, it will be used. Otherwise it will check for the id key in the query environment config: + +```yaml +queries: [ + { + query: "SELECT EventName, EventType, UsageType, Client, Value, StartDate, EndDate FROM PlatformEventUsageMetric ...", + id: ["Client", "Value", "StartDate", "EndDate"] + } +] +``` + +In this case, the integration will combine the fields `Client`, `Value`, `StartDate`, and `EndDate` to form a unique identifier for each event of the type `PlatformEventUsageMetric`. ## Usage diff --git a/src/newrelic_logging/query_env.py b/src/newrelic_logging/query_env.py index c5fb13d..8e76c79 100644 --- a/src/newrelic_logging/query_env.py +++ b/src/newrelic_logging/query_env.py @@ -1,6 +1,11 @@ from .query import Query import string +# NOTE: this sandbox can be jailbroken using the trick to exec statements inside an exec block, and run an import (and other tricks): +# https://book.hacktricks.xyz/generic-methodologies-and-resources/python/bypass-python-sandboxes#operators-and-short-tricks +# https://stackoverflow.com/a/3068475/2076108 +# Would be better to use a real sandbox like https://pypi.org/project/RestrictedPython/ or https://doc.pypy.org/en/latest/sandbox.html +# or parse a small language that only supports funcion calls and binary expressions. def sandbox(code): __import__ = None __loader__ = None diff --git a/src/newrelic_logging/salesforce.py b/src/newrelic_logging/salesforce.py index 827ae6d..4bed3fd 100644 --- a/src/newrelic_logging/salesforce.py +++ b/src/newrelic_logging/salesforce.py @@ -9,6 +9,7 @@ import redis from requests import RequestException import copy +import hashlib from .query_env import substitute from .auth_env import Auth from .query import Query @@ -344,7 +345,7 @@ def make_single_query(self, query_obj: Query) -> Query: } query = substitute(args, query_obj.get_query(), env) query = query.replace(' ', '+') - + query_obj.set_query(query) return query_obj @@ -470,6 +471,19 @@ def pack_event_into_log(self, rows, query: Query): if self.data_cache.check_cached_id(record_id): # Record cached, skip it continue + else: + id_keys = query.get_env().get("id", []) + compound_id = "" + for key in id_keys: + compound_id = compound_id + str(row.get(key, "")) + if compound_id != "": + m = hashlib.sha3_256() + m.update(compound_id.encode('utf-8')) + row['Id'] = m.hexdigest() + record_id = row['Id'] + if self.data_cache.check_cached_id(record_id): + # Record cached, skip it + continue timestamp_attr = query.get_env().get("timestamp_attr", "CreatedDate") if timestamp_attr in row: