Skip to content

Commit

Permalink
feat(taps): A new schema_is_valid built-in tap test validates strea…
Browse files Browse the repository at this point in the history
…m schemas against the JSON Schema specification
  • Loading branch information
edgarrmondragon committed Aug 9, 2024
1 parent 22d4eae commit bf080d9
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 23 deletions.
8 changes: 5 additions & 3 deletions singer_sdk/plugin_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from types import MappingProxyType

import click
from jsonschema import Draft7Validator

from singer_sdk import about, metrics
from singer_sdk.cli import plugin_cli
Expand All @@ -32,11 +31,14 @@
PluginCapabilities,
)
from singer_sdk.mapper import PluginMapper
from singer_sdk.typing import extend_validator_with_defaults
from singer_sdk.typing import (
DEFAULT_JSONSCHEMA_VALIDATOR,
extend_validator_with_defaults,
)

SDK_PACKAGE_NAME = "singer_sdk"

JSONSchemaValidator = extend_validator_with_defaults(Draft7Validator)
JSONSchemaValidator = extend_validator_with_defaults(DEFAULT_JSONSCHEMA_VALIDATOR)


class MapperNotInitialized(Exception):
Expand Down
7 changes: 6 additions & 1 deletion singer_sdk/sinks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from types import MappingProxyType

import jsonschema
import jsonschema.validators
from typing_extensions import override

from singer_sdk._singerlib.json import deserialize_json
Expand All @@ -38,6 +39,7 @@
get_datelike_property_type,
handle_invalid_timestamp_in_record,
)
from singer_sdk.typing import DEFAULT_JSONSCHEMA_VALIDATOR

if t.TYPE_CHECKING:
from logging import Logger
Expand Down Expand Up @@ -88,7 +90,10 @@ def __init__(
Raises:
InvalidJSONSchema: If the schema provided from tap or mapper is invalid.
"""
jsonschema_validator = jsonschema.Draft7Validator
jsonschema_validator = jsonschema.validators.validator_for(
schema,
DEFAULT_JSONSCHEMA_VALIDATOR,
)

super().__init__(schema)
if validate_formats:
Expand Down
2 changes: 2 additions & 0 deletions singer_sdk/testing/suites.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
StreamRecordMatchesStreamSchema,
StreamRecordSchemaMatchesCatalogTest,
StreamReturnsRecordTest,
StreamSchemaIsValidTest,
TapCLIPrintsTest,
TapDiscoveryTest,
TapStreamConnectionTest,
Expand Down Expand Up @@ -72,6 +73,7 @@ class TestSuite(t.Generic[T]):
StreamRecordMatchesStreamSchema,
StreamRecordSchemaMatchesCatalogTest,
StreamReturnsRecordTest,
StreamSchemaIsValidTest,
StreamPrimaryKeysTest,
],
)
Expand Down
34 changes: 29 additions & 5 deletions singer_sdk/testing/tap_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
import typing as t
import warnings

from jsonschema import Draft7Validator
from jsonschema import validators
from jsonschema.exceptions import SchemaError

import singer_sdk.helpers._typing as th
from singer_sdk import Tap
from singer_sdk.helpers._compat import datetime_fromisoformat
from singer_sdk.typing import DEFAULT_JSONSCHEMA_VALIDATOR

from .templates import AttributeTestTemplate, StreamTestTemplate, TapTestTemplate

Expand Down Expand Up @@ -71,6 +73,28 @@ def test(self) -> None:
assert "progress_markers" not in final_state, self.message


class StreamSchemaIsValidTest(StreamTestTemplate):
"""Test that a stream's schema is valid."""

name = "schema_is_valid"

def test(self) -> None:
"""Run test.
Raises:
AssertionError: if schema is not valid.
"""
schema = self.stream.schema
default = DEFAULT_JSONSCHEMA_VALIDATOR
validator = validators.validator_for(schema, default=default)

try:
validator.check_schema(schema)
except SchemaError as e: # pragma: no cover
msg = f"Schema is not valid: {e}"
raise AssertionError(msg) from e


class StreamReturnsRecordTest(StreamTestTemplate):
"""Test that a stream sync returns at least 1 record."""

Expand Down Expand Up @@ -134,10 +158,10 @@ class StreamRecordMatchesStreamSchema(StreamTestTemplate):
def test(self) -> None:
"""Run test."""
schema = self.stream.schema
validator = Draft7Validator(
schema,
format_checker=Draft7Validator.FORMAT_CHECKER,
)
default = DEFAULT_JSONSCHEMA_VALIDATOR
validator = validators.validator_for(schema, default=default)(schema)
validator.format_checker = default.FORMAT_CHECKER

for record in self.stream_records:
errors = list(validator.iter_errors(record))
error_messages = "\n".join(
Expand Down
10 changes: 6 additions & 4 deletions singer_sdk/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@
import sqlalchemy as sa
from jsonschema import ValidationError, validators

if t.TYPE_CHECKING:
from jsonschema.protocols import Validator

from singer_sdk.helpers._typing import (
JSONSCHEMA_ANNOTATION_SECRET,
JSONSCHEMA_ANNOTATION_WRITEONLY,
Expand All @@ -71,13 +68,16 @@
if t.TYPE_CHECKING:
import sys

from jsonschema.protocols import Validator

if sys.version_info >= (3, 10):
from typing import TypeAlias # noqa: ICN003
else:
from typing_extensions import TypeAlias


__all__ = [
"DEFAULT_JSONSCHEMA_VALIDATOR",
"ArrayType",
"BooleanType",
"CustomType",
Expand Down Expand Up @@ -118,11 +118,13 @@
None,
]

DEFAULT_JSONSCHEMA_VALIDATOR: type[Validator] = validators.Draft7Validator # type: ignore[assignment]

T = t.TypeVar("T", bound=_JsonValue)
P = t.TypeVar("P")


def extend_validator_with_defaults(validator_class): # noqa: ANN001, ANN201
def extend_validator_with_defaults(validator_class: type[Validator]): # noqa: ANN201
"""Fill in defaults, before validating with the provided JSON Schema Validator.
See
Expand Down
16 changes: 8 additions & 8 deletions tests/core/sinks/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ def test_validate_fastjsonschema():


@pytest.fixture
def draft7_sink_stop():
"""Return a sink object with Draft7 checks enabled."""
def default_draft_sink_stop():
"""Return a sink object with the default draft checks enabled."""

class CustomSink(BatchSinkMock):
"""Custom sink class."""
Expand All @@ -147,8 +147,8 @@ class CustomSink(BatchSinkMock):


@pytest.fixture
def draft7_sink_continue():
"""Return a sink object with Draft7 checks enabled."""
def default_draft_sink_continue():
"""Return a sink object with the default draft checks enabled."""

class CustomSink(BatchSinkMock):
"""Custom sink class."""
Expand All @@ -174,9 +174,9 @@ class CustomSink(BatchSinkMock):


def test_validate_record_jsonschema_format_checking_enabled_stop_on_error(
draft7_sink_stop,
default_draft_sink_stop,
):
sink: BatchSinkMock = draft7_sink_stop
sink: BatchSinkMock = default_draft_sink_stop

record = {
"id": 1,
Expand All @@ -195,9 +195,9 @@ def test_validate_record_jsonschema_format_checking_enabled_stop_on_error(

def test_validate_record_jsonschema_format_checking_enabled_continue_on_error(
capsys: pytest.CaptureFixture,
draft7_sink_continue,
default_draft_sink_continue,
):
sink: BatchSinkMock = draft7_sink_continue
sink: BatchSinkMock = default_draft_sink_continue

record = {
"id": 1,
Expand Down
4 changes: 2 additions & 2 deletions tests/core/test_jsonschema_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from textwrap import dedent

import pytest
from jsonschema import Draft6Validator

from singer_sdk.helpers._typing import (
JSONSCHEMA_ANNOTATION_SECRET,
Expand All @@ -27,6 +26,7 @@
)
from singer_sdk.tap_base import Tap
from singer_sdk.typing import (
DEFAULT_JSONSCHEMA_VALIDATOR,
AllOf,
AnyType,
ArrayType,
Expand Down Expand Up @@ -932,7 +932,7 @@ def test_discriminated_union():
),
)

validator = Draft6Validator(th.to_dict())
validator = DEFAULT_JSONSCHEMA_VALIDATOR(th.to_dict())

assert validator.is_valid(
{
Expand Down

0 comments on commit bf080d9

Please sign in to comment.