diff --git a/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/tap.py b/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/tap.py index 42c3f8ab5..e41c337d7 100644 --- a/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/tap.py +++ b/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/tap.py @@ -38,6 +38,7 @@ class Tap{{ cookiecutter.source_name }}({{ 'SQL' if cookiecutter.stream_type == "auth_token", th.StringType, required=True, + secret=True, # Flag config as protected. description="The token to authenticate against the API service" ), th.Property( diff --git a/cookiecutter/target-template/{{cookiecutter.target_id}}/{{cookiecutter.library_name}}/target.py b/cookiecutter/target-template/{{cookiecutter.target_id}}/{{cookiecutter.library_name}}/target.py index b6976b337..35b4a4675 100644 --- a/cookiecutter/target-template/{{cookiecutter.target_id}}/{{cookiecutter.library_name}}/target.py +++ b/cookiecutter/target-template/{{cookiecutter.target_id}}/{{cookiecutter.library_name}}/target.py @@ -21,6 +21,7 @@ class Target{{ cookiecutter.destination_name }}({{ target_class }}): th.Property( "sqlalchemy_url", th.StringType, + secret=True, # Flag config as protected. description="SQLAlchemy connection string", ), {%- else %} @@ -34,6 +35,12 @@ class Target{{ cookiecutter.destination_name }}({{ target_class }}): th.StringType, description="The scheme with which output files will be named" ), + th.Property( + "auth_token", + th.StringType, + secret=True, # Flag config as protected. + description="The path to the target output file" + ), {%- endif %} ).to_dict() diff --git a/docs/faq.md b/docs/faq.md index e0b18c5e0..953cef072 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -11,7 +11,11 @@ However, if you're using an IDE such as VSCode, you should be able to set up the Ensure your interpreter is set to poetry if you've followed the [Dev Guide](./dev_guide.md). Checkout this [gif](https://visualstudiomagazine.com/articles/2021/04/20/~/media/ECG/visualstudiomagazine/Images/2021/04/poetry.ashx) for how to change your interpreter. -## I'm having trouble getting the base class to __init__. +### Handling credentials and other secrets in config + +As of SDK version `0.13.0`, developers can use the `secret=True` indication in the `Property` class constructor to flag secrets such as API tokens and passwords. We recommend all developers use this option where applicable so that orchestrators may consider this designation when determining how to store the user's provided config. + +## I'm having trouble getting the base class to **init**. Ensure you're using the `super()` method to inherit methods from the base class. diff --git a/samples/sample_tap_gitlab/gitlab_tap.py b/samples/sample_tap_gitlab/gitlab_tap.py index 6bdb04fb0..0f02697df 100644 --- a/samples/sample_tap_gitlab/gitlab_tap.py +++ b/samples/sample_tap_gitlab/gitlab_tap.py @@ -34,7 +34,7 @@ class SampleTapGitlab(Tap): name: str = "sample-tap-gitlab" config_jsonschema = PropertiesList( - Property("auth_token", StringType, required=True), + Property("auth_token", StringType, required=True, secret=True), Property("project_ids", ArrayType(StringType), required=True), Property("group_ids", ArrayType(StringType), required=True), Property("start_date", DateTimeType, required=True), diff --git a/samples/sample_tap_google_analytics/ga_tap.py b/samples/sample_tap_google_analytics/ga_tap.py index 8044df0b2..76fd952b2 100644 --- a/samples/sample_tap_google_analytics/ga_tap.py +++ b/samples/sample_tap_google_analytics/ga_tap.py @@ -24,7 +24,7 @@ class SampleTapGoogleAnalytics(Tap): config_jsonschema = PropertiesList( Property("view_id", StringType(), required=True), Property("client_email", StringType(), required=True), - Property("private_key", StringType(), required=True), + Property("private_key", StringType(), required=True, secret=True), ).to_dict() def discover_streams(self) -> List[SampleGoogleAnalyticsStream]: diff --git a/singer_sdk/helpers/_typing.py b/singer_sdk/helpers/_typing.py index dc8389681..853b6229b 100644 --- a/singer_sdk/helpers/_typing.py +++ b/singer_sdk/helpers/_typing.py @@ -11,6 +11,8 @@ _MAX_TIMESTAMP = "9999-12-31 23:59:59.999999" _MAX_TIME = "23:59:59.999999" +JSONSCHEMA_ANNOTATION_SECRET = "secret" +JSONSCHEMA_ANNOTATION_WRITEONLY = "writeOnly" class DatetimeErrorTreatmentEnum(Enum): @@ -54,6 +56,36 @@ def append_type(type_dict: dict, new_type: str) -> dict: ) +def is_secret_type(type_dict: dict) -> bool: + """Return True if JSON Schema type definition appears to be a secret. + + Will return true if either `writeOnly` or `secret` are true on this type + or any of the type's subproperties. + + Args: + type_dict: The JSON Schema type to check. + + Raises: + ValueError: If type_dict is None or empty. + + Returns: + True if we detect any sensitive property nodes. + """ + if type_dict.get(JSONSCHEMA_ANNOTATION_WRITEONLY) or type_dict.get( + JSONSCHEMA_ANNOTATION_SECRET + ): + return True + + if "properties" in type_dict: + # Recursively check subproperties and return True if any child is secret. + return any( + is_secret_type(child_type_dict) + for child_type_dict in type_dict["properties"].values() + ) + + return False + + def is_object_type(property_schema: dict) -> Optional[bool]: """Return true if the JSON Schema type is an object or None if detection fails.""" if "anyOf" not in property_schema and "type" not in property_schema: @@ -86,6 +118,34 @@ def is_datetime_type(type_dict: dict) -> bool: ) +def is_date_or_datetime_type(type_dict: dict) -> bool: + """Return True if JSON Schema type definition is a 'date'/'date-time' type. + + Also returns True if type is nested within an 'anyOf' type Array. + + Args: + type_dict: The JSON Schema definition. + + Raises: + ValueError: If type is empty or null. + + Returns: + True if date or date-time, else False. + """ + if "anyOf" in type_dict: + for type_dict in type_dict["anyOf"]: + if is_date_or_datetime_type(type_dict): + return True + return False + + if "type" in type_dict: + return type_dict.get("format") in {"date", "date-time"} + + raise ValueError( + f"Could not detect type of replication key using schema '{type_dict}'" + ) + + def get_datelike_property_type(property_schema: Dict) -> Optional[str]: """Return one of 'date-time', 'time', or 'date' if property is date-like. @@ -152,6 +212,23 @@ def is_string_array_type(type_dict: dict) -> bool: return "array" in type_dict["type"] and bool(is_string_type(type_dict["items"])) +def is_array_type(type_dict: dict) -> bool: + """Return True if JSON Schema type definition is a string array.""" + if not type_dict: + raise ValueError( + "Could not detect type from empty type_dict. " + "Did you forget to define a property in the stream schema?" + ) + + if "anyOf" in type_dict: + return any([is_array_type(t) for t in type_dict["anyOf"]]) + + if "type" not in type_dict: + raise ValueError(f"Could not detect type from schema '{type_dict}'") + + return "array" in type_dict["type"] + + def is_boolean_type(property_schema: dict) -> Optional[bool]: """Return true if the JSON Schema type is a boolean or None if detection fails.""" if "anyOf" not in property_schema and "type" not in property_schema: @@ -162,6 +239,16 @@ def is_boolean_type(property_schema: dict) -> Optional[bool]: return False +def is_integer_type(property_schema: dict) -> Optional[bool]: + """Return true if the JSON Schema type is a boolean or None if detection fails.""" + if "anyOf" not in property_schema and "type" not in property_schema: + return None # Could not detect data type + for property_type in property_schema.get("anyOf", [property_schema.get("type")]): + if "integer" in property_type or property_type == "integer": + return True + return False + + def is_string_type(property_schema: dict) -> Optional[bool]: """Return true if the JSON Schema type is a boolean or None if detection fails.""" if "anyOf" not in property_schema and "type" not in property_schema: diff --git a/singer_sdk/plugin_base.py b/singer_sdk/plugin_base.py index 6e1908b72..0d4ba30db 100644 --- a/singer_sdk/plugin_base.py +++ b/singer_sdk/plugin_base.py @@ -21,7 +21,7 @@ ) import click -from jsonschema import Draft4Validator, SchemaError, ValidationError +from jsonschema import Draft7Validator, SchemaError, ValidationError from singer_sdk import metrics from singer_sdk.configuration._dict_config import parse_environment_config @@ -42,7 +42,7 @@ SDK_PACKAGE_NAME = "singer_sdk" -JSONSchemaValidator = extend_validator_with_defaults(Draft4Validator) +JSONSchemaValidator = extend_validator_with_defaults(Draft7Validator) class PluginBase(metaclass=abc.ABCMeta): diff --git a/singer_sdk/sinks/core.py b/singer_sdk/sinks/core.py index 186ca28b2..aafa864c2 100644 --- a/singer_sdk/sinks/core.py +++ b/singer_sdk/sinks/core.py @@ -13,7 +13,7 @@ from typing import IO, Any, Mapping, Sequence from dateutil import parser -from jsonschema import Draft4Validator, FormatChecker +from jsonschema import Draft7Validator, FormatChecker from singer_sdk.helpers._batch import ( BaseBatchFileEncoding, @@ -29,7 +29,7 @@ ) from singer_sdk.plugin_base import PluginBase -JSONSchemaValidator = Draft4Validator +JSONSchemaValidator = Draft7Validator class Sink(metaclass=abc.ABCMeta): @@ -80,7 +80,7 @@ def __init__( self._batch_records_read: int = 0 self._batch_dupe_records_merged: int = 0 - self._validator = Draft4Validator(schema, format_checker=FormatChecker()) + self._validator = Draft7Validator(schema, format_checker=FormatChecker()) def _get_context(self, record: dict) -> dict: """Return an empty dictionary by default. diff --git a/singer_sdk/typing.py b/singer_sdk/typing.py index e373c2113..d39072ac3 100644 --- a/singer_sdk/typing.py +++ b/singer_sdk/typing.py @@ -48,7 +48,12 @@ from jsonschema import validators from singer_sdk.helpers._classproperty import classproperty -from singer_sdk.helpers._typing import append_type, get_datelike_property_type +from singer_sdk.helpers._typing import ( + JSONSCHEMA_ANNOTATION_SECRET, + JSONSCHEMA_ANNOTATION_WRITEONLY, + append_type, + get_datelike_property_type, +) if sys.version_info >= (3, 10): from typing import TypeAlias @@ -352,21 +357,30 @@ def __init__( required: bool = False, default: _JsonValue = None, description: str = None, + secret: bool = False, ) -> None: """Initialize Property object. + Note: Properties containing secrets should be specified with `secret=True`. + Doing so will add the annotation `writeOnly=True`, in accordance with JSON + Schema Draft 7 and later, and `secret=True` as an additional hint to readers. + + More info: https://json-schema.org/draft-07/json-schema-release-notes.html + Args: name: Property name. wrapped: JSON Schema type of the property. required: Whether this is a required property. default: Default value in the JSON Schema. description: Long-text property description. + secret: True if this is a credential or other secret. """ self.name = name self.wrapped = wrapped self.optional = not required self.default = default self.description = description + self.secret = secret @property def type_dict(self) -> dict: # type: ignore # OK: @classproperty vs @property @@ -402,6 +416,13 @@ def to_dict(self) -> dict: type_dict.update({"default": self.default}) if self.description: type_dict.update({"description": self.description}) + if self.secret: + type_dict.update( + { + JSONSCHEMA_ANNOTATION_SECRET: True, + JSONSCHEMA_ANNOTATION_WRITEONLY: True, + } + ) return {self.name: type_dict} diff --git a/tests/core/test_jsonschema_helpers.py b/tests/core/test_jsonschema_helpers.py index 175d0b577..9b731af16 100644 --- a/tests/core/test_jsonschema_helpers.py +++ b/tests/core/test_jsonschema_helpers.py @@ -1,10 +1,25 @@ """Test sample sync.""" +from __future__ import annotations + import re -from typing import List +from typing import Callable, List import pytest +from singer_sdk.helpers._typing import ( + JSONSCHEMA_ANNOTATION_SECRET, + JSONSCHEMA_ANNOTATION_WRITEONLY, + is_array_type, + is_boolean_type, + is_date_or_datetime_type, + is_datetime_type, + is_integer_type, + is_object_type, + is_secret_type, + is_string_array_type, + is_string_type, +) from singer_sdk.streams.core import Stream from singer_sdk.tap_base import Tap from singer_sdk.typing import ( @@ -35,6 +50,17 @@ UUIDType, ) +TYPE_FN_CHECKS: set[Callable] = { + is_array_type, + is_boolean_type, + is_date_or_datetime_type, + is_datetime_type, + is_integer_type, + is_secret_type, + is_string_array_type, + is_string_type, +} + class ConfigTestTap(Tap): """Test tap class.""" @@ -43,11 +69,11 @@ class ConfigTestTap(Tap): config_jsonschema = PropertiesList( Property("host", StringType, required=True), Property("username", StringType, required=True), - Property("password", StringType, required=True), + Property("password", StringType, required=True, secret=True), Property("batch_size", IntegerType, default=-1), ).to_dict() - def discover_streams(self) -> List[Stream]: + def discover_streams(self) -> list[Stream]: return [] @@ -253,6 +279,114 @@ def test_inbuilt_type(json_type: JSONTypeHelper, expected_json_schema: dict): assert json_type.type_dict == expected_json_schema +@pytest.mark.parametrize( + "property_obj,expected_jsonschema,type_fn_checks_true", + [ + ( + Property("my_prop1", StringType, required=True), + {"my_prop1": {"type": ["string"]}}, + {is_string_type}, + ), + ( + Property("my_prop2", StringType, required=False), + {"my_prop2": {"type": ["string", "null"]}}, + {is_string_type}, + ), + ( + Property("my_prop3", StringType, secret=True), + { + "my_prop3": { + "type": ["string", "null"], + JSONSCHEMA_ANNOTATION_SECRET: True, + JSONSCHEMA_ANNOTATION_WRITEONLY: True, + } + }, + {is_secret_type, is_string_type}, + ), + ( + Property("my_prop4", StringType, description="This is a property."), + { + "my_prop4": { + "description": "This is a property.", + "type": ["string", "null"], + } + }, + {is_string_type}, + ), + ( + Property("my_prop5", StringType, default="some_val"), + { + "my_prop5": { + "default": "some_val", + "type": ["string", "null"], + } + }, + {is_string_type}, + ), + ( + Property("my_prop6", ArrayType(StringType)), + { + "my_prop6": { + "type": ["array", "null"], + "items": {"type": ["string"]}, + } + }, + {is_array_type, is_string_array_type}, + ), + ( + Property( + "my_prop7", + ObjectType( + Property("not_a_secret", StringType), + Property("is_a_secret", StringType, secret=True), + ), + ), + { + "my_prop7": { + "type": ["object", "null"], + "properties": { + "not_a_secret": {"type": ["string", "null"]}, + "is_a_secret": { + "type": ["string", "null"], + "secret": True, + "writeOnly": True, + }, + }, + } + }, + {is_object_type, is_secret_type}, + ), + ( + Property("my_prop8", IntegerType), + { + "my_prop8": { + "type": ["integer", "null"], + } + }, + {is_integer_type}, + ), + ], +) +def test_property_creation( + property_obj: Property, + expected_jsonschema: dict, + type_fn_checks_true: set[Callable], +) -> None: + property_dict = property_obj.to_dict() + assert property_dict == expected_jsonschema + for check_fn in TYPE_FN_CHECKS: + property_name = list(property_dict.keys())[0] + property_node = property_dict[property_name] + if check_fn in type_fn_checks_true: + assert ( + check_fn(property_node) is True + ), f"{check_fn.__name__} was not True for {repr(property_dict)}" + else: + assert ( + check_fn(property_node) is False + ), f"{check_fn.__name__} was not False for {repr(property_dict)}" + + def test_wrapped_type_dict(): with pytest.raises( ValueError, @@ -381,7 +515,7 @@ def test_array_type(): "requried, duplicates, additional properties", ], ) -def test_object_type(properties: List[Property], addtional_properties: JSONTypeHelper): +def test_object_type(properties: list[Property], addtional_properties: JSONTypeHelper): merged_property_schemas = { name: schema for p in properties for name, schema in p.to_dict().items() }