Merge pull request #5 from databrickslabs/issue/apply_changes_infer_v…

…ersion_col_datatype - Fixed infer datatypes from sequence_by to __START_AT, __END_AT for apply changes API - Changed setup.py for version - Removed Git release tag from github actions
databrickslabs · Jun 22, 2023 · 38e08f0 · 38e08f0
2 parents 84945c1 + 4cbf6fb
commit 38e08f0
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 17 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -37,16 +37,4 @@ jobs:
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
           user: __token__
-          password: ${{ secrets.LABS_PYPI_TOKEN }}
-
-      - name: Create Release
-        uses: actions/create-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions
-        with:
-          tag_name: ${{ github.ref }}
-          release_name: Release ${{ github.ref }}
-          body: |
-            Release for version ${{ github.ref }}. Please refer to CHANGELOG.md for detailed information.
-          draft: false
-          prerelease: false
+          password: ${{ secrets.LABS_PYPI_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 **NOTE:** For CLI interfaces, we support SemVer approach. However, for API components we don't use SemVer as of now. This may lead to instability when using dbx API methods directly.
 
 [Please read through the Keep a Changelog (~5min)](https://keepachangelog.com/en/1.0.0/).
+
+## [v0.0.3] - 2023-06-07
+### Fixed
+-  infer datatypes from sequence_by to __START_AT, __END_AT for apply changes API
+### Changed
+-   setup.py for version
+### Removed
+-   Git release tag from github actions
+
 ## [v0.0.2] - 2023-05-11
 ### Added
 - Table properties support for bronze, quarantine and silver tables using create_streaming_live_table api call

diff --git a/setup.py b/setup.py
@@ -19,7 +19,7 @@
     """
 setup(
     name="dlt_meta",
-    version="0.0.2",
+    version="0.0.2.3",
     python_requires=">=3.8",
     setup_requires=["wheel>=0.37.1,<=0.40.0"],
     install_requires=INSTALL_REQUIRES,

diff --git a/src/dataflow_pipeline.py b/src/dataflow_pipeline.py
@@ -4,7 +4,7 @@
 import dlt
 from pyspark.sql import DataFrame
 from pyspark.sql.functions import expr
-from pyspark.sql.types import IntegerType, StructType, StructField
+from pyspark.sql.types import StructType, StructField
 
 from src.dataflow_spec import BronzeDataflowSpec, SilverDataflowSpec, DataflowSpecUtils
 from src.pipeline_readers import PipelineReaders
@@ -253,16 +253,20 @@ def cdc_apply_changes(self):
             else self.silver_schema
         )
 
+        sequenced_by_data_type = None
+
         if cdc_apply_changes.except_column_list:
             modified_schema = StructType([])
             for field in struct_schema.fields:
                 if field.name not in cdc_apply_changes.except_column_list:
                     modified_schema.add(field)
+                if field.name == cdc_apply_changes.sequence_by:
+                    sequenced_by_data_type = field.dataType
             struct_schema = modified_schema
 
         if cdc_apply_changes.scd_type == "2":
-            struct_schema.add(StructField("__START_AT", IntegerType()))
-            struct_schema.add(StructField("__END_AT", IntegerType()))
+            struct_schema.add(StructField("__START_AT", sequenced_by_data_type))
+            struct_schema.add(StructField("__END_AT", sequenced_by_data_type))
 
         dlt.create_streaming_live_table(
             name=f"{self.dataflowSpec.targetDetails['table']}",