From 64f9e64d26f94c1f06c15c9f3e36bcc4e51b5910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 13 Aug 2024 14:45:35 -0600 Subject: [PATCH] feat(taps): A new `schema_is_valid` built-in tap test validates stream schemas against the JSON Schema specification (#2567) --- singer_sdk/plugin_base.py | 8 ++++--- singer_sdk/sinks/core.py | 7 +++++- singer_sdk/testing/suites.py | 2 ++ singer_sdk/testing/tap_tests.py | 34 +++++++++++++++++++++++---- singer_sdk/typing.py | 10 ++++---- tests/core/sinks/test_validation.py | 16 ++++++------- tests/core/test_jsonschema_helpers.py | 4 ++-- 7 files changed, 58 insertions(+), 23 deletions(-) diff --git a/singer_sdk/plugin_base.py b/singer_sdk/plugin_base.py index f709fed95..eb5b39de3 100644 --- a/singer_sdk/plugin_base.py +++ b/singer_sdk/plugin_base.py @@ -13,7 +13,6 @@ from types import MappingProxyType import click -from jsonschema import Draft7Validator from singer_sdk import about, metrics from singer_sdk.cli import plugin_cli @@ -32,11 +31,14 @@ PluginCapabilities, ) from singer_sdk.mapper import PluginMapper -from singer_sdk.typing import extend_validator_with_defaults +from singer_sdk.typing import ( + DEFAULT_JSONSCHEMA_VALIDATOR, + extend_validator_with_defaults, +) SDK_PACKAGE_NAME = "singer_sdk" -JSONSchemaValidator = extend_validator_with_defaults(Draft7Validator) +JSONSchemaValidator = extend_validator_with_defaults(DEFAULT_JSONSCHEMA_VALIDATOR) class MapperNotInitialized(Exception): diff --git a/singer_sdk/sinks/core.py b/singer_sdk/sinks/core.py index 53533d58b..e3c1ef566 100644 --- a/singer_sdk/sinks/core.py +++ b/singer_sdk/sinks/core.py @@ -14,6 +14,7 @@ from types import MappingProxyType import jsonschema +import jsonschema.validators from typing_extensions import override from singer_sdk._singerlib.json import deserialize_json @@ -38,6 +39,7 @@ get_datelike_property_type, handle_invalid_timestamp_in_record, ) +from singer_sdk.typing import DEFAULT_JSONSCHEMA_VALIDATOR if t.TYPE_CHECKING: from logging import Logger @@ -88,7 +90,10 @@ def __init__( Raises: InvalidJSONSchema: If the schema provided from tap or mapper is invalid. """ - jsonschema_validator = jsonschema.Draft7Validator + jsonschema_validator = jsonschema.validators.validator_for( + schema, + DEFAULT_JSONSCHEMA_VALIDATOR, + ) super().__init__(schema) if validate_formats: diff --git a/singer_sdk/testing/suites.py b/singer_sdk/testing/suites.py index df93c86d2..fd53e6e19 100644 --- a/singer_sdk/testing/suites.py +++ b/singer_sdk/testing/suites.py @@ -17,6 +17,7 @@ StreamRecordMatchesStreamSchema, StreamRecordSchemaMatchesCatalogTest, StreamReturnsRecordTest, + StreamSchemaIsValidTest, TapCLIPrintsTest, TapDiscoveryTest, TapStreamConnectionTest, @@ -72,6 +73,7 @@ class TestSuite(t.Generic[T]): StreamRecordMatchesStreamSchema, StreamRecordSchemaMatchesCatalogTest, StreamReturnsRecordTest, + StreamSchemaIsValidTest, StreamPrimaryKeysTest, ], ) diff --git a/singer_sdk/testing/tap_tests.py b/singer_sdk/testing/tap_tests.py index e5b0efc42..5839e0cea 100644 --- a/singer_sdk/testing/tap_tests.py +++ b/singer_sdk/testing/tap_tests.py @@ -5,11 +5,13 @@ import typing as t import warnings -from jsonschema import Draft7Validator +from jsonschema import validators +from jsonschema.exceptions import SchemaError import singer_sdk.helpers._typing as th from singer_sdk import Tap from singer_sdk.helpers._compat import datetime_fromisoformat +from singer_sdk.typing import DEFAULT_JSONSCHEMA_VALIDATOR from .templates import AttributeTestTemplate, StreamTestTemplate, TapTestTemplate @@ -71,6 +73,28 @@ def test(self) -> None: assert "progress_markers" not in final_state, self.message +class StreamSchemaIsValidTest(StreamTestTemplate): + """Test that a stream's schema is valid.""" + + name = "schema_is_valid" + + def test(self) -> None: + """Run test. + + Raises: + AssertionError: if schema is not valid. + """ + schema = self.stream.schema + default = DEFAULT_JSONSCHEMA_VALIDATOR + validator = validators.validator_for(schema, default=default) + + try: + validator.check_schema(schema) + except SchemaError as e: # pragma: no cover + msg = f"Schema is not valid: {e}" + raise AssertionError(msg) from e + + class StreamReturnsRecordTest(StreamTestTemplate): """Test that a stream sync returns at least 1 record.""" @@ -134,10 +158,10 @@ class StreamRecordMatchesStreamSchema(StreamTestTemplate): def test(self) -> None: """Run test.""" schema = self.stream.schema - validator = Draft7Validator( - schema, - format_checker=Draft7Validator.FORMAT_CHECKER, - ) + default = DEFAULT_JSONSCHEMA_VALIDATOR + validator = validators.validator_for(schema, default=default)(schema) + validator.format_checker = default.FORMAT_CHECKER + for record in self.stream_records: errors = list(validator.iter_errors(record)) error_messages = "\n".join( diff --git a/singer_sdk/typing.py b/singer_sdk/typing.py index a8ca332a4..6bf8d9527 100644 --- a/singer_sdk/typing.py +++ b/singer_sdk/typing.py @@ -58,9 +58,6 @@ import sqlalchemy as sa from jsonschema import ValidationError, validators -if t.TYPE_CHECKING: - from jsonschema.protocols import Validator - from singer_sdk.helpers._typing import ( JSONSCHEMA_ANNOTATION_SECRET, JSONSCHEMA_ANNOTATION_WRITEONLY, @@ -71,6 +68,8 @@ if t.TYPE_CHECKING: import sys + from jsonschema.protocols import Validator + if sys.version_info >= (3, 10): from typing import TypeAlias # noqa: ICN003 else: @@ -78,6 +77,7 @@ __all__ = [ + "DEFAULT_JSONSCHEMA_VALIDATOR", "ArrayType", "BooleanType", "CustomType", @@ -118,11 +118,13 @@ None, ] +DEFAULT_JSONSCHEMA_VALIDATOR: type[Validator] = validators.Draft7Validator # type: ignore[assignment] + T = t.TypeVar("T", bound=_JsonValue) P = t.TypeVar("P") -def extend_validator_with_defaults(validator_class): # noqa: ANN001, ANN201 +def extend_validator_with_defaults(validator_class: type[Validator]): # noqa: ANN201 """Fill in defaults, before validating with the provided JSON Schema Validator. See diff --git a/tests/core/sinks/test_validation.py b/tests/core/sinks/test_validation.py index c6a05ced1..f8df7f775 100644 --- a/tests/core/sinks/test_validation.py +++ b/tests/core/sinks/test_validation.py @@ -121,8 +121,8 @@ def test_validate_fastjsonschema(): @pytest.fixture -def draft7_sink_stop(): - """Return a sink object with Draft7 checks enabled.""" +def default_draft_sink_stop(): + """Return a sink object with the default draft checks enabled.""" class CustomSink(BatchSinkMock): """Custom sink class.""" @@ -147,8 +147,8 @@ class CustomSink(BatchSinkMock): @pytest.fixture -def draft7_sink_continue(): - """Return a sink object with Draft7 checks enabled.""" +def default_draft_sink_continue(): + """Return a sink object with the default draft checks enabled.""" class CustomSink(BatchSinkMock): """Custom sink class.""" @@ -174,9 +174,9 @@ class CustomSink(BatchSinkMock): def test_validate_record_jsonschema_format_checking_enabled_stop_on_error( - draft7_sink_stop, + default_draft_sink_stop, ): - sink: BatchSinkMock = draft7_sink_stop + sink: BatchSinkMock = default_draft_sink_stop record = { "id": 1, @@ -195,9 +195,9 @@ def test_validate_record_jsonschema_format_checking_enabled_stop_on_error( def test_validate_record_jsonschema_format_checking_enabled_continue_on_error( capsys: pytest.CaptureFixture, - draft7_sink_continue, + default_draft_sink_continue, ): - sink: BatchSinkMock = draft7_sink_continue + sink: BatchSinkMock = default_draft_sink_continue record = { "id": 1, diff --git a/tests/core/test_jsonschema_helpers.py b/tests/core/test_jsonschema_helpers.py index 15a63ec2c..aeb2bae0b 100644 --- a/tests/core/test_jsonschema_helpers.py +++ b/tests/core/test_jsonschema_helpers.py @@ -8,7 +8,6 @@ from textwrap import dedent import pytest -from jsonschema import Draft6Validator from singer_sdk.helpers._typing import ( JSONSCHEMA_ANNOTATION_SECRET, @@ -27,6 +26,7 @@ ) from singer_sdk.tap_base import Tap from singer_sdk.typing import ( + DEFAULT_JSONSCHEMA_VALIDATOR, AllOf, AnyType, ArrayType, @@ -932,7 +932,7 @@ def test_discriminated_union(): ), ) - validator = Draft6Validator(th.to_dict()) + validator = DEFAULT_JSONSCHEMA_VALIDATOR(th.to_dict()) assert validator.is_valid( {