From 3ed9f0d8bc1bf4b3725cd15ccd842a37a70a0127 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= Date: Fri, 14 Jul 2023 16:29:20 -0600 Subject: [PATCH 1/5] fix: Safely ignore parsing record field as datetime if it is missing in schema --- singer_sdk/sinks/core.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/singer_sdk/sinks/core.py b/singer_sdk/sinks/core.py index ec43c4060..cf275a550 100644 --- a/singer_sdk/sinks/core.py +++ b/singer_sdk/sinks/core.py @@ -358,12 +358,16 @@ def _parse_timestamps_in_record( schema: TODO treatment: TODO """ - for key in record: - datelike_type = get_datelike_property_type(schema["properties"][key]) + for key, value in record.items(): + try: + datelike_type = get_datelike_property_type(schema["properties"][key]) + except KeyError: + self.logger.debug("No schema for record field '%s'", key) + continue if datelike_type: - date_val = record[key] + date_val = value try: - if record[key] is not None: + if value is not None: date_val = parser.parse(date_val) except parser.ParserError as ex: date_val = handle_invalid_timestamp_in_record( From ddec1ca3f5c160eb1f205091d4a4bea824f9fa52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= Date: Mon, 25 Sep 2023 11:11:31 -0600 Subject: [PATCH 2/5] Add test --- tests/core/sinks/test_validation.py | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/core/sinks/test_validation.py diff --git a/tests/core/sinks/test_validation.py b/tests/core/sinks/test_validation.py new file mode 100644 index 000000000..cdef95e41 --- /dev/null +++ b/tests/core/sinks/test_validation.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import datetime + +from tests.conftest import BatchSinkMock, TargetMock + + +def test_validate_record(): + target = TargetMock() + sink = BatchSinkMock( + target, + "users", + { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "created_at": {"type": "string", "format": "date-time"}, + }, + }, + ["id"], + ) + + record_message = { + "type": "RECORD", + "stream": "users", + "record": { + "id": 1, + "created_at": "2021-01-01T00:00:00+00:00", + "missing_datetime": "2021-01-01T00:00:00+00:00", + }, + "time_extracted": "2021-01-01T00:00:00+00:00", + "version": 100, + } + record = record_message["record"] + updated_record = sink._validate_and_parse(record) + assert updated_record["created_at"] == datetime.datetime( + 2021, + 1, + 1, + 0, + 0, + tzinfo=datetime.timezone.utc, + ) + assert updated_record["missing_datetime"] == "2021-01-01T00:00:00+00:00" From c85d4e1c25f6c866a8b0775f6cebf9ea542dc80c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= Date: Tue, 26 Sep 2023 16:44:27 -0600 Subject: [PATCH 3/5] Check if key is in schema instead of try-except inside loop --- singer_sdk/sinks/core.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/singer_sdk/sinks/core.py b/singer_sdk/sinks/core.py index 4cf0b1e20..d21dee43e 100644 --- a/singer_sdk/sinks/core.py +++ b/singer_sdk/sinks/core.py @@ -366,11 +366,10 @@ def _parse_timestamps_in_record( treatment: TODO """ for key, value in record.items(): - try: - datelike_type = get_datelike_property_type(schema["properties"][key]) - except KeyError: + if key not in schema["properties"]: self.logger.debug("No schema for record field '%s'", key) continue + datelike_type = get_datelike_property_type(schema["properties"][key]) if datelike_type: date_val = value try: From 5e6a427e5d37c8f09ae92db167d81c39da58bb95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= Date: Tue, 26 Sep 2023 18:45:09 -0600 Subject: [PATCH 4/5] Test invalid datetime --- tests/conftest.py | 2 ++ tests/core/sinks/test_validation.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 7e7c39958..cb392bd3a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,6 +12,7 @@ from singer_sdk import SQLConnector from singer_sdk import typing as th +from singer_sdk.helpers._typing import DatetimeErrorTreatmentEnum from singer_sdk.helpers.capabilities import PluginCapabilities from singer_sdk.sinks import BatchSink, SQLSink from singer_sdk.target_base import SQLTarget, Target @@ -75,6 +76,7 @@ class BatchSinkMock(BatchSink): """A mock Sink class.""" name = "batch-sink-mock" + datetime_error_treatment = DatetimeErrorTreatmentEnum.MAX def __init__( self, diff --git a/tests/core/sinks/test_validation.py b/tests/core/sinks/test_validation.py index cdef95e41..489174ba5 100644 --- a/tests/core/sinks/test_validation.py +++ b/tests/core/sinks/test_validation.py @@ -15,6 +15,7 @@ def test_validate_record(): "properties": { "id": {"type": "integer"}, "created_at": {"type": "string", "format": "date-time"}, + "invalid_datetime": {"type": "string", "format": "date-time"}, }, }, ["id"], @@ -27,6 +28,7 @@ def test_validate_record(): "id": 1, "created_at": "2021-01-01T00:00:00+00:00", "missing_datetime": "2021-01-01T00:00:00+00:00", + "invalid_datetime": "not a datetime", }, "time_extracted": "2021-01-01T00:00:00+00:00", "version": 100, @@ -42,3 +44,4 @@ def test_validate_record(): tzinfo=datetime.timezone.utc, ) assert updated_record["missing_datetime"] == "2021-01-01T00:00:00+00:00" + assert updated_record["invalid_datetime"] == "9999-12-31 23:59:59.999999" From 6fa9cf4a28d3d5f97e9faa35b47b5b2461daec5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= Date: Thu, 28 Sep 2023 09:14:33 -0600 Subject: [PATCH 5/5] Simplify test --- singer_sdk/sinks/core.py | 2 +- tests/core/sinks/test_validation.py | 18 ++++++------------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/singer_sdk/sinks/core.py b/singer_sdk/sinks/core.py index d21dee43e..e578c2084 100644 --- a/singer_sdk/sinks/core.py +++ b/singer_sdk/sinks/core.py @@ -367,7 +367,7 @@ def _parse_timestamps_in_record( """ for key, value in record.items(): if key not in schema["properties"]: - self.logger.debug("No schema for record field '%s'", key) + self.logger.warning("No schema for record field '%s'", key) continue datelike_type = get_datelike_property_type(schema["properties"][key]) if datelike_type: diff --git a/tests/core/sinks/test_validation.py b/tests/core/sinks/test_validation.py index 489174ba5..5a7ca39a3 100644 --- a/tests/core/sinks/test_validation.py +++ b/tests/core/sinks/test_validation.py @@ -21,20 +21,14 @@ def test_validate_record(): ["id"], ) - record_message = { - "type": "RECORD", - "stream": "users", - "record": { - "id": 1, - "created_at": "2021-01-01T00:00:00+00:00", - "missing_datetime": "2021-01-01T00:00:00+00:00", - "invalid_datetime": "not a datetime", - }, - "time_extracted": "2021-01-01T00:00:00+00:00", - "version": 100, + record = { + "id": 1, + "created_at": "2021-01-01T00:00:00+00:00", + "missing_datetime": "2021-01-01T00:00:00+00:00", + "invalid_datetime": "not a datetime", } - record = record_message["record"] updated_record = sink._validate_and_parse(record) + assert updated_record["created_at"] == datetime.datetime( 2021, 1,