Skip to content

Commit

Permalink
feat: add secrets support for tap and target config, via `Property(..…
Browse files Browse the repository at this point in the history
…., secret=True)` (#1096)
  • Loading branch information
aaronsteers authored Oct 21, 2022
1 parent c699c72 commit 253851e
Show file tree
Hide file tree
Showing 10 changed files with 267 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class Tap{{ cookiecutter.source_name }}({{ 'SQL' if cookiecutter.stream_type ==
"auth_token",
th.StringType,
required=True,
secret=True, # Flag config as protected.
description="The token to authenticate against the API service"
),
th.Property(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Target{{ cookiecutter.destination_name }}({{ target_class }}):
th.Property(
"sqlalchemy_url",
th.StringType,
secret=True, # Flag config as protected.
description="SQLAlchemy connection string",
),
{%- else %}
Expand All @@ -34,6 +35,12 @@ class Target{{ cookiecutter.destination_name }}({{ target_class }}):
th.StringType,
description="The scheme with which output files will be named"
),
th.Property(
"auth_token",
th.StringType,
secret=True, # Flag config as protected.
description="The path to the target output file"
),
{%- endif %}
).to_dict()

Expand Down
6 changes: 5 additions & 1 deletion docs/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ However, if you're using an IDE such as VSCode, you should be able to set up the
Ensure your interpreter is set to poetry if you've followed the [Dev Guide](./dev_guide.md).
Checkout this [gif](https://visualstudiomagazine.com/articles/2021/04/20/~/media/ECG/visualstudiomagazine/Images/2021/04/poetry.ashx) for how to change your interpreter.

## I'm having trouble getting the base class to __init__.
### Handling credentials and other secrets in config

As of SDK version `0.13.0`, developers can use the `secret=True` indication in the `Property` class constructor to flag secrets such as API tokens and passwords. We recommend all developers use this option where applicable so that orchestrators may consider this designation when determining how to store the user's provided config.

## I'm having trouble getting the base class to **init**.

Ensure you're using the `super()` method to inherit methods from the base class.

Expand Down
2 changes: 1 addition & 1 deletion samples/sample_tap_gitlab/gitlab_tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class SampleTapGitlab(Tap):

name: str = "sample-tap-gitlab"
config_jsonschema = PropertiesList(
Property("auth_token", StringType, required=True),
Property("auth_token", StringType, required=True, secret=True),
Property("project_ids", ArrayType(StringType), required=True),
Property("group_ids", ArrayType(StringType), required=True),
Property("start_date", DateTimeType, required=True),
Expand Down
2 changes: 1 addition & 1 deletion samples/sample_tap_google_analytics/ga_tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class SampleTapGoogleAnalytics(Tap):
config_jsonschema = PropertiesList(
Property("view_id", StringType(), required=True),
Property("client_email", StringType(), required=True),
Property("private_key", StringType(), required=True),
Property("private_key", StringType(), required=True, secret=True),
).to_dict()

def discover_streams(self) -> List[SampleGoogleAnalyticsStream]:
Expand Down
87 changes: 87 additions & 0 deletions singer_sdk/helpers/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

_MAX_TIMESTAMP = "9999-12-31 23:59:59.999999"
_MAX_TIME = "23:59:59.999999"
JSONSCHEMA_ANNOTATION_SECRET = "secret"
JSONSCHEMA_ANNOTATION_WRITEONLY = "writeOnly"


class DatetimeErrorTreatmentEnum(Enum):
Expand Down Expand Up @@ -54,6 +56,36 @@ def append_type(type_dict: dict, new_type: str) -> dict:
)


def is_secret_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition appears to be a secret.
Will return true if either `writeOnly` or `secret` are true on this type
or any of the type's subproperties.
Args:
type_dict: The JSON Schema type to check.
Raises:
ValueError: If type_dict is None or empty.
Returns:
True if we detect any sensitive property nodes.
"""
if type_dict.get(JSONSCHEMA_ANNOTATION_WRITEONLY) or type_dict.get(
JSONSCHEMA_ANNOTATION_SECRET
):
return True

if "properties" in type_dict:
# Recursively check subproperties and return True if any child is secret.
return any(
is_secret_type(child_type_dict)
for child_type_dict in type_dict["properties"].values()
)

return False


def is_object_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is an object or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand Down Expand Up @@ -86,6 +118,34 @@ def is_datetime_type(type_dict: dict) -> bool:
)


def is_date_or_datetime_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition is a 'date'/'date-time' type.
Also returns True if type is nested within an 'anyOf' type Array.
Args:
type_dict: The JSON Schema definition.
Raises:
ValueError: If type is empty or null.
Returns:
True if date or date-time, else False.
"""
if "anyOf" in type_dict:
for type_dict in type_dict["anyOf"]:
if is_date_or_datetime_type(type_dict):
return True
return False

if "type" in type_dict:
return type_dict.get("format") in {"date", "date-time"}

raise ValueError(
f"Could not detect type of replication key using schema '{type_dict}'"
)


def get_datelike_property_type(property_schema: Dict) -> Optional[str]:
"""Return one of 'date-time', 'time', or 'date' if property is date-like.
Expand Down Expand Up @@ -152,6 +212,23 @@ def is_string_array_type(type_dict: dict) -> bool:
return "array" in type_dict["type"] and bool(is_string_type(type_dict["items"]))


def is_array_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition is a string array."""
if not type_dict:
raise ValueError(
"Could not detect type from empty type_dict. "
"Did you forget to define a property in the stream schema?"
)

if "anyOf" in type_dict:
return any([is_array_type(t) for t in type_dict["anyOf"]])

if "type" not in type_dict:
raise ValueError(f"Could not detect type from schema '{type_dict}'")

return "array" in type_dict["type"]


def is_boolean_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand All @@ -162,6 +239,16 @@ def is_boolean_type(property_schema: dict) -> Optional[bool]:
return False


def is_integer_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
return None # Could not detect data type
for property_type in property_schema.get("anyOf", [property_schema.get("type")]):
if "integer" in property_type or property_type == "integer":
return True
return False


def is_string_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand Down
4 changes: 2 additions & 2 deletions singer_sdk/plugin_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)

import click
from jsonschema import Draft4Validator, SchemaError, ValidationError
from jsonschema import Draft7Validator, SchemaError, ValidationError

from singer_sdk import metrics
from singer_sdk.configuration._dict_config import parse_environment_config
Expand All @@ -42,7 +42,7 @@
SDK_PACKAGE_NAME = "singer_sdk"


JSONSchemaValidator = extend_validator_with_defaults(Draft4Validator)
JSONSchemaValidator = extend_validator_with_defaults(Draft7Validator)


class PluginBase(metaclass=abc.ABCMeta):
Expand Down
6 changes: 3 additions & 3 deletions singer_sdk/sinks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from typing import IO, Any, Mapping, Sequence

from dateutil import parser
from jsonschema import Draft4Validator, FormatChecker
from jsonschema import Draft7Validator, FormatChecker

from singer_sdk.helpers._batch import (
BaseBatchFileEncoding,
Expand All @@ -29,7 +29,7 @@
)
from singer_sdk.plugin_base import PluginBase

JSONSchemaValidator = Draft4Validator
JSONSchemaValidator = Draft7Validator


class Sink(metaclass=abc.ABCMeta):
Expand Down Expand Up @@ -80,7 +80,7 @@ def __init__(
self._batch_records_read: int = 0
self._batch_dupe_records_merged: int = 0

self._validator = Draft4Validator(schema, format_checker=FormatChecker())
self._validator = Draft7Validator(schema, format_checker=FormatChecker())

def _get_context(self, record: dict) -> dict:
"""Return an empty dictionary by default.
Expand Down
23 changes: 22 additions & 1 deletion singer_sdk/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@
from jsonschema import validators

from singer_sdk.helpers._classproperty import classproperty
from singer_sdk.helpers._typing import append_type, get_datelike_property_type
from singer_sdk.helpers._typing import (
JSONSCHEMA_ANNOTATION_SECRET,
JSONSCHEMA_ANNOTATION_WRITEONLY,
append_type,
get_datelike_property_type,
)

if sys.version_info >= (3, 10):
from typing import TypeAlias
Expand Down Expand Up @@ -352,21 +357,30 @@ def __init__(
required: bool = False,
default: _JsonValue = None,
description: str = None,
secret: bool = False,
) -> None:
"""Initialize Property object.
Note: Properties containing secrets should be specified with `secret=True`.
Doing so will add the annotation `writeOnly=True`, in accordance with JSON
Schema Draft 7 and later, and `secret=True` as an additional hint to readers.
More info: https://json-schema.org/draft-07/json-schema-release-notes.html
Args:
name: Property name.
wrapped: JSON Schema type of the property.
required: Whether this is a required property.
default: Default value in the JSON Schema.
description: Long-text property description.
secret: True if this is a credential or other secret.
"""
self.name = name
self.wrapped = wrapped
self.optional = not required
self.default = default
self.description = description
self.secret = secret

@property
def type_dict(self) -> dict: # type: ignore # OK: @classproperty vs @property
Expand Down Expand Up @@ -402,6 +416,13 @@ def to_dict(self) -> dict:
type_dict.update({"default": self.default})
if self.description:
type_dict.update({"description": self.description})
if self.secret:
type_dict.update(
{
JSONSCHEMA_ANNOTATION_SECRET: True,
JSONSCHEMA_ANNOTATION_WRITEONLY: True,
}
)
return {self.name: type_dict}


Expand Down
Loading

0 comments on commit 253851e

Please sign in to comment.