diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 35c5777..44efec6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -37,16 +37,4 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ - password: ${{ secrets.LABS_PYPI_TOKEN }} - - - name: Create Release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions - with: - tag_name: ${{ github.ref }} - release_name: Release ${{ github.ref }} - body: | - Release for version ${{ github.ref }}. Please refer to CHANGELOG.md for detailed information. - draft: false - prerelease: false \ No newline at end of file + password: ${{ secrets.LABS_PYPI_TOKEN }} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index d7e64ed..06989e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 **NOTE:** For CLI interfaces, we support SemVer approach. However, for API components we don't use SemVer as of now. This may lead to instability when using dbx API methods directly. [Please read through the Keep a Changelog (~5min)](https://keepachangelog.com/en/1.0.0/). + +## [v0.0.3] - 2023-06-07 +### Fixed +- infer datatypes from sequence_by to __START_AT, __END_AT for apply changes API +### Changed +- setup.py for version +### Removed +- Git release tag from github actions + ## [v0.0.2] - 2023-05-11 ### Added - Table properties support for bronze, quarantine and silver tables using create_streaming_live_table api call diff --git a/setup.py b/setup.py index 76e8111..e5bf0f1 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ """ setup( name="dlt_meta", - version="0.0.2", + version="0.0.2.3", python_requires=">=3.8", setup_requires=["wheel>=0.37.1,<=0.40.0"], install_requires=INSTALL_REQUIRES, diff --git a/src/dataflow_pipeline.py b/src/dataflow_pipeline.py index 36f5d55..e8d8931 100644 --- a/src/dataflow_pipeline.py +++ b/src/dataflow_pipeline.py @@ -4,7 +4,7 @@ import dlt from pyspark.sql import DataFrame from pyspark.sql.functions import expr -from pyspark.sql.types import IntegerType, StructType, StructField +from pyspark.sql.types import StructType, StructField from src.dataflow_spec import BronzeDataflowSpec, SilverDataflowSpec, DataflowSpecUtils from src.pipeline_readers import PipelineReaders @@ -253,16 +253,20 @@ def cdc_apply_changes(self): else self.silver_schema ) + sequenced_by_data_type = None + if cdc_apply_changes.except_column_list: modified_schema = StructType([]) for field in struct_schema.fields: if field.name not in cdc_apply_changes.except_column_list: modified_schema.add(field) + if field.name == cdc_apply_changes.sequence_by: + sequenced_by_data_type = field.dataType struct_schema = modified_schema if cdc_apply_changes.scd_type == "2": - struct_schema.add(StructField("__START_AT", IntegerType())) - struct_schema.add(StructField("__END_AT", IntegerType())) + struct_schema.add(StructField("__START_AT", sequenced_by_data_type)) + struct_schema.add(StructField("__END_AT", sequenced_by_data_type)) dlt.create_streaming_live_table( name=f"{self.dataflowSpec.targetDetails['table']}",