Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/BuzzCutNorman/sdk into 1626…
Browse files Browse the repository at this point in the history
…-configurable-batch-size-and-max-wait-limit-for-targets
  • Loading branch information
BuzzCutNorman committed Feb 2, 2024
2 parents 17f143c + 9600a04 commit a0e34c9
Show file tree
Hide file tree
Showing 67 changed files with 1,018 additions and 695 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cookiecutter-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
fail-fast: true
matrix:
include:
- { python-version: "3.11", os: "ubuntu-latest" }
- { python-version: "3.12", os: "ubuntu-latest" }

steps:
- uses: actions/checkout@v4
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ jobs:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
sqlalchemy: ["2"]
include:
- { session: tests, python-version: "3.11", os: "ubuntu-latest", sqlalchemy: "1" }
- { session: doctest, python-version: "3.11", os: "ubuntu-latest", sqlalchemy: "2" }
- { session: mypy, python-version: "3.11", os: "ubuntu-latest", sqlalchemy: "2" }
- { session: tests, python-version: "3.12", os: "ubuntu-latest", sqlalchemy: "1" }
- { session: doctest, python-version: "3.12", os: "ubuntu-latest", sqlalchemy: "2" }
- { session: mypy, python-version: "3.12", os: "ubuntu-latest", sqlalchemy: "2" }

steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -109,7 +109,7 @@ jobs:
runs-on: ubuntu-latest
if: ${{ !github.event.pull_request.head.repo.fork }}
env:
NOXPYTHON: "3.11"
NOXPYTHON: "3.12"
NOXSESSION: tests
SAMPLE_TAP_GITLAB_AUTH_TOKEN: ${{ secrets.SAMPLE_TAP_GITLAB_AUTH_TOKEN }}
SAMPLE_TAP_GITLAB_GROUP_IDS: ${{ secrets.SAMPLE_TAP_GITLAB_GROUP_IDS }}
Expand Down Expand Up @@ -180,7 +180,7 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: '3.11'
python-version: '3.12'
cache: 'pip'
cache-dependency-path: 'poetry.lock'

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/version_bump.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:

- uses: actions/setup-python@v5
with:
python-version: "3.11"
python-version: "3.12"
architecture: x64

- name: Bump version
Expand Down
2 changes: 2 additions & 0 deletions cookiecutter/mapper-template/cookiecutter.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"mapper_id": "mapper-{{ cookiecutter.name.lower() }}",
"library_name": "{{ cookiecutter.mapper_id.replace('-', '_') }}",
"variant": "None (Skip)",
"faker_extra": false,
"include_ci_files": ["GitHub", "None (Skip)"],
"license": ["Apache-2.0"],
"ide": ["VSCode", "None"],
Expand All @@ -14,6 +15,7 @@
"admin_email": "Provide your [bold yellow]email[/]",
"mapper_id": "The ID of the tap, in kebab-case",
"library_name": "The name of the library, in snake_case. This is how the library will be imported in Python.",
"faker_extra": "Add [bold orange1][link=https://faker.readthedocs.io/en/master/]Faker[/link][/] as an extra dependency to support generating fake data in stream maps?",
"include_ci_files": "Whether to include CI files for a common CI services",
"license": "The license for the project",
"ide": "Add configuration files for your preferred IDE"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ packages = [

[tool.poetry.dependencies]
python = ">=3.8"
singer-sdk = { version="~=0.34.1" }
singer-sdk = { version="~=0.34.1"{{ ', extras = ["faker"]' if cookiecutter.faker_extra }} }
fs-s3fs = { version = "~=1.1.1", optional = true }

[tool.poetry.group.dev.dependencies]
Expand Down
2 changes: 2 additions & 0 deletions cookiecutter/tap-template/cookiecutter.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"JWT",
"Custom or N/A"
],
"faker_extra": false,
"include_ci_files": ["GitHub", "None"],
"license": ["Apache-2.0", "None"],
"ide": ["VSCode", "None"],
Expand All @@ -25,6 +26,7 @@
"library_name": "The name of the library, in snake_case. This is how the library will be imported in Python.",
"stream_type": "The type of stream the source provides",
"auth_method": "The [bold red]authentication[/] method used by the source, for REST and GraphQL sources",
"faker_extra": "Add [bold orange1][link=https://faker.readthedocs.io/en/master/]Faker[/link][/] as an extra dependency to support generating fake data in stream maps?",
"include_ci_files": "Whether to include CI files for a common CI services",
"license": "The license for the project",
"ide": "Add configuration files for your preferred IDE"
Expand Down
3 changes: 3 additions & 0 deletions cookiecutter/tap-template/hooks/post_gen_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
for client_py in PACKAGE_PATH.rglob("*-client.py"):
client_py.unlink()

if "{{ cookiecutter.stream_type }}" != "REST":
shutil.rmtree(PACKAGE_PATH.joinpath("schemas"), ignore_errors=True)

if "{{ cookiecutter.auth_method }}" not in ("OAuth2", "JWT"):
PACKAGE_PATH.joinpath("auth.py").unlink()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ packages = [
[tool.poetry.dependencies]
python = ">=3.8"
importlib-resources = { version = "==6.1.*", python = "<3.9" }
singer-sdk = { version="~=0.34.1" }
singer-sdk = { version="~=0.34.1"{{ ', extras = ["faker"]' if cookiecutter.faker_extra }} }
fs-s3fs = { version = "~=1.1.1", optional = true }
{%- if cookiecutter.stream_type in ["REST", "GraphQL"] %}
requests = "~=2.31.0"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""JSON schema files for the REST API."""
2 changes: 2 additions & 0 deletions cookiecutter/target-template/cookiecutter.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"library_name": "{{ cookiecutter.target_id.replace('-', '_') }}",
"variant": "None (Skip)",
"serialization_method": ["Per record", "Per batch", "SQL"],
"faker_extra": false,
"include_ci_files": ["GitHub", "None (Skip)"],
"license": ["Apache-2.0"],
"ide": ["VSCode", "None"],
Expand All @@ -16,6 +17,7 @@
"mapper_id": "The ID of the tap, in kebab-case",
"library_name": "The name of the library, in snake_case. This is how the library will be imported in Python.",
"serialization_method": "The serialization method to use for loading data",
"faker_extra": "Add [bold orange1][link=https://faker.readthedocs.io/en/master/]Faker[/link][/] as an extra dependency to support generating fake data in stream maps?",
"include_ci_files": "Whether to include CI files for a common CI services",
"license": "The license for the project",
"ide": "Add configuration files for your preferred IDE"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ packages = [

[tool.poetry.dependencies]
python = ">=3.8"
singer-sdk = { version="~=0.34.1" }
singer-sdk = { version="~=0.34.1"{{ ', extras = ["faker"]' if cookiecutter.faker_extra }} }
fs-s3fs = { version = "~=1.1.1", optional = true }
{%- if cookiecutter.serialization_method != "SQL" %}
requests = "~=2.31.0"
Expand Down
2 changes: 1 addition & 1 deletion docs/batch.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ AWS S3

### `encoding`

The `encoding` field is used to specify the format and compression of the batch files. Currently only `jsonl` and `gzip` are supported, respectively.
The `encoding` field is used to specify the format and compression of the batch files. Currently `jsonl`, `gzip` and `parquet` are supported.

### `manifest`

Expand Down
8 changes: 8 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"sphinx.ext.napoleon",
"sphinx.ext.autosectionlabel",
"sphinx.ext.autosummary",
"sphinx.ext.intersphinx",
"sphinx_copybutton",
"myst_parser",
"sphinx_reredirects",
Expand Down Expand Up @@ -127,3 +128,10 @@
redirects = {
"porting.html": "guides/porting.html",
}

# -- Options for intersphinx -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#configuration
intersphinx_mapping = {
"requests": ("https://requests.readthedocs.io/en/latest/", None),
"python": ("https://docs.python.org/3/", None),
}
9 changes: 9 additions & 0 deletions docs/dev_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,15 @@ Some APIs instead return the records as values inside an object where each key i
]
```

## Extra features

The following [extra features](https://packaging.python.org/en/latest/specifications/dependency-specifiers/#extras) are available for the Singer SDK:

- `faker` - Enables the use of [Faker](https://faker.readthedocs.io/en/master/) in [stream maps](stream_maps.md).
- `s3` - Enables AWS S3 as a [BATCH storage](batch.md#the-batch-message).
- `parquet` - Enables as [BATCH encoding](batch.md#encoding).
- `testing` - Pytest dependencies required to use the [Tap & Target Testing Framework](testing.md).

## Resources

### Detailed Class Reference
Expand Down
47 changes: 29 additions & 18 deletions docs/stream_maps.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,13 @@ These capabilities are all out of scope _by design_:
a transformation tool like [dbt](https://www.getdbt.com), or (b) create a custom mapper
plugin with inline lookup logic.

### A feature for all Singer users, enabled by the SDK
## A feature for all Singer users, enabled by the SDK

The mapping features described here are create for the **_users_** of SDK-based taps and targets.
The mapping features described here are created for the **_users_** of SDK-based taps and targets, which support inline transformations with `stream_maps` and `stream_map_config` out-of-box.

Developers simply enable the feature using the instructions below, and then users can benefit from having inline transformation capabilities out-of-box on their favorite taps and targets.
**Note:** to support non-SDK taps and targets, the standalone inline mapper plugin [`meltano-map-transformer`](https://hub.meltano.com/mappers/meltano-map-transformer/) follows all specifications defined here and can apply mapping transformations between _any_ Singer tap and target, even if they are not built using the SDK.

**Note:** to support non-SDK taps and targets, we are also creating a standalone inline mapper plugin (`meltano-map-transform`), which follows all specifications defined here and can apply mapping transformations between _any_ Singer tap and target, even if they are not built using the SDK.

## Enabling Stream Maps in SDK-Based Plugins

To support inline mapping functions, the developer only needs to declare two plugin settings,
called `stream_maps` and `stream_map_config`, and declare both settings as `object` type. (For example:
`Property("stream_maps, ObjectType())` if using the python helper classes or
`"stream_maps": {"type": "object"}` if using native JSON Schema declarations.)

If the `stream_maps` setting is detected, the following behaviors will be implemented
by the SDK automatically:
The following behaviors are implemented by the SDK automatically:

1. For taps, the SCHEMA and RECORD messages will automatically be transformed,
duplicated, filtered, or aliased, as per the `stream_maps` config settings _after_
Expand All @@ -108,7 +98,7 @@ by the SDK automatically:
setting _prior_ to any Sink processing functions.
- This means that the target developer can assume that all streams and records are
transformed, aliased, filtered, etc. _before_ any custom target code is executed.
3. The upcoming standalone mapper plugin (`meltano-map-transform`) is a hybrid tap/target which
3. The standalone mapper plugin [`meltano-map-transformer`](https://hub.meltano.com/mappers/meltano-map-transformer/) is a hybrid tap/target which
simply receives input from a tap, transforms all stream and schema messages via the
`stream_maps` config option, and then emits the resulting stream(s) to a downstream
target.
Expand All @@ -122,8 +112,7 @@ by the SDK automatically:

The `stream_maps` config expects a mapping of stream names to a structured transform object.

Here is a sample `stream_maps` transformation which removes all references to `email` and
adds `email_domain` and `email_hash` as new properties:
Here is a sample `stream_maps` transformation which obfuscates `phone_number` with a fake value, removes all references to `email` and adds `email_domain` and `email_hash` as new properties:

`meltano.yml` or `config.json`:

Expand All @@ -138,9 +127,18 @@ stream_maps:
email_domain: owner_email.split('@')[-1]
# for uniqueness checks
email_hash: md5(config['hash_seed'] + owner_email)
# generate a fake phone number
phone_number: fake.phone_number()
stream_map_config:
# hash outputs are not able to be replicated without the original seed:
hash_seed: 01AWZh7A6DzGm6iJZZ2T
faker_config:
# set specific seed
seed: 0
# set specific locales
locale:
- en_US
- en_GB
```
````

Expand All @@ -151,11 +149,19 @@ stream_map_config:
"customers": {
"email": null,
"email_domain": "owner_email.split('@')[-1]",
"email_hash": "md5(config['hash_seed'] + owner_email)"
"email_hash": "md5(config['hash_seed'] + owner_email)",
"phone_number": "fake.phone_number()"
}
},
"stream_map_config": {
"hash_seed": "01AWZh7A6DzGm6iJZZ2T"
},
"faker_config": {
"seed": 0,
"locale": [
"en_US",
"en_GB"
]
}
}
```
Expand Down Expand Up @@ -236,6 +242,11 @@ can be referenced directly by mapping expressions.
- `record` - an alias for the record values dictionary in the current stream.
- `_` - same as `record` but shorter to type
- `self` - the existing property value if the property already exists
- `fake` - a [`Faker`](https://faker.readthedocs.io/en/master/) instance, configurable via `faker_config` (see previous example) - see the built-in [standard providers](https://faker.readthedocs.io/en/master/providers.html) for available methods

```{tip}
The `fake` object is only available if the plugin specifies `faker` as an addtional dependency (through the `singer-sdk` `faker` extra, or directly).
```

#### Automatic Schema Detection

Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/mapper-base.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"mapper_id": "mapper-base",
"library_name": "mapper_base",
"variant": "None (Skip)",
"faker_extra": false,
"include_ci_files": "None (Skip)",
"license": "Apache-2.0",
"ide": "VSCode",
Expand Down
18 changes: 18 additions & 0 deletions e2e-tests/cookiecutters/tap-faker.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"cookiecutter": {
"source_name": "AutomaticTestTap",
"admin_name": "Automatic Tester",
"admin_email": "auto.tester@example.com",
"tap_id": "tap-faker",
"library_name": "tap_faker",
"variant": "None (Skip)",
"stream_type": "REST",
"auth_method": "Bearer Token",
"include_ci_files": "None (Skip)",
"faker_extra": true,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
"_output_dir": "."
}
}
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/tap-graphql-jwt.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"stream_type": "GraphQL",
"auth_method": "JWT",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/tap-other-custom.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"stream_type": "Other",
"auth_method": "Custom or N/A",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/tap-rest-api_key-github.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"stream_type": "REST",
"auth_method": "API Key",
"include_ci_files": "GitHub",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/tap-rest-basic_auth.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"stream_type": "REST",
"auth_method": "Basic Auth",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/tap-rest-bearer_token.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"stream_type": "REST",
"auth_method": "Bearer Token",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/tap-rest-custom.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"stream_type": "REST",
"auth_method": "Custom or N/A",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/tap-rest-jwt.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"stream_type": "REST",
"auth_method": "JWT",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/tap-rest-oauth2.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"stream_type": "REST",
"auth_method": "OAuth2",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/tap-sql-custom.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"stream_type": "SQL",
"auth_method": "Custom or N/A",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "../tap-template/",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/target-per_record.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"variant": "None (Skip)",
"serialization_method": "Per record",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "./sdk/cookiecutter/target-template",
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/cookiecutters/target-sql.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"variant": "None (Skip)",
"serialization_method": "SQL",
"include_ci_files": "None (Skip)",
"faker_extra": false,
"license": "Apache-2.0",
"ide": "VSCode",
"_template": "./sdk/cookiecutter/target-template",
Expand Down
Loading

0 comments on commit a0e34c9

Please sign in to comment.