diff --git a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java index b123a70e..2a821fb9 100644 --- a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java @@ -12,8 +12,9 @@ import liquibase.structure.DatabaseObject; import liquibase.structure.core.Table; -public class MissingTableChangeGeneratorDatabricks extends MissingTableChangeGenerator { +import java.util.regex.Pattern; +public class MissingTableChangeGeneratorDatabricks extends MissingTableChangeGenerator { @Override public int getPriority(Class objectType, Database database) { @@ -34,12 +35,14 @@ public Change[] fixMissing(DatabaseObject missingObject, DiffOutputControl contr ExtendedTableProperties extendedTableProperties = new ExtendedTableProperties( missingObject.getAttribute("Location", String.class), missingObject.getAttribute("tblProperties", String.class)); + String clusterColumns = missingObject.getAttribute("clusterColumns", ""); - changes[0] = getCreateTableChangeDatabricks(extendedTableProperties, changes); + changes[0] = getCreateTableChangeDatabricks(extendedTableProperties, changes, clusterColumns); return changes; } - private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTableProperties extendedTableProperties, Change[] changes) { + private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTableProperties extendedTableProperties, + Change[] changes, String clusterColumns) { CreateTableChange temp = (CreateTableChange) changes[0]; CreateTableChangeDatabricks createTableChangeDatabricks = new CreateTableChangeDatabricks(); createTableChangeDatabricks.setColumns(temp.getColumns()); @@ -51,6 +54,9 @@ private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTable createTableChangeDatabricks.setRemarks(temp.getRemarks()); createTableChangeDatabricks.setIfNotExists(temp.getIfNotExists()); createTableChangeDatabricks.setRowDependencies(temp.getRowDependencies()); + if (!clusterColumns.isEmpty()) { + createTableChangeDatabricks.setClusterColumns(sanitizeClusterColumns(clusterColumns)); + } createTableChangeDatabricks.setExtendedTableProperties(extendedTableProperties); return createTableChangeDatabricks; @@ -60,4 +66,9 @@ private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTable protected CreateTableChange createCreateTableChange() { return new CreateTableChangeDatabricks(); } -} + + private String sanitizeClusterColumns(String clusterColumnProperty) { + Pattern pattern = Pattern.compile("[\\[\\]\\\"]"); + return clusterColumnProperty.replaceAll(pattern.toString(), ""); + } +} \ No newline at end of file diff --git a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java index d47260c1..ea90fd3f 100644 --- a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java @@ -13,12 +13,13 @@ import java.util.List; import java.util.Map; +import java.util.stream.Collectors; public class TableSnapshotGeneratorDatabricks extends TableSnapshotGenerator { private static final String LOCATION = "Location"; - private static final String TABLE_PROPERTIES = "Table Properties"; private static final String TBL_PROPERTIES = "tblProperties"; + private static final String CLUSTER_COLUMNS = "clusterColumns"; private static final String DETAILED_TABLE_INFORMATION_NODE = "# Detailed Table Information"; @Override @@ -49,13 +50,32 @@ protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot if (detailedInformationNode && tableProperty.get("COL_NAME").equals(LOCATION)) { table.setAttribute(LOCATION, tableProperty.get("DATA_TYPE")); } - if (detailedInformationNode && tableProperty.get("COL_NAME").equals(TABLE_PROPERTIES)) { - String tblProperties = (String) tableProperty.get("DATA_TYPE"); - table.setAttribute(TBL_PROPERTIES, tblProperties.substring(1, tblProperties.length() - 1));// remove starting and ending square brackets - } } + Map tblProperties = getTblPropertiesMap(database, example.getName()); + if (tblProperties.containsKey(CLUSTER_COLUMNS)) { + // used remove, as clusterColumns tblProperty is not allowed in create/alter table statements + table.setAttribute(CLUSTER_COLUMNS, tblProperties.remove(CLUSTER_COLUMNS)); + } + table.setAttribute(TBL_PROPERTIES, getTblPropertiesString(tblProperties)); } return table; } + private Map getTblPropertiesMap(Database database, String table) throws DatabaseException { + String query = String.format("SHOW TBLPROPERTIES %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), table); + List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) + .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); + return tablePropertiesResponse.stream() + .collect(Collectors.toMap(mapElement -> (String) mapElement.get("KEY"), mapElement -> (String) mapElement.get("VALUE"))); + } + + private String getTblPropertiesString(Map propertiesMap) { + StringBuilder csvString = new StringBuilder(); + propertiesMap.entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) + .forEach(entry -> csvString.append(entry.getKey()).append("=").append(entry.getValue()).append(",")); + return csvString.toString().replaceAll(",$", ""); + } + } \ No newline at end of file diff --git a/src/main/java/liquibase/ext/databricks/sqlgenerator/CreateTableGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/sqlgenerator/CreateTableGeneratorDatabricks.java index 01998ece..6ed2dcdc 100644 --- a/src/main/java/liquibase/ext/databricks/sqlgenerator/CreateTableGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/sqlgenerator/CreateTableGeneratorDatabricks.java @@ -17,6 +17,8 @@ public class CreateTableGeneratorDatabricks extends CreateTableGenerator { + private static final String CLUSTERING_INFORMATION_TBL_PROPERTY_START = "clusteringColumns=[["; + @Override public int getPriority() { @@ -49,7 +51,7 @@ public Sql[] generateSql(CreateTableStatement statement, Database database, SqlG if ((!StringUtils.isEmpty(thisStatement.getTableFormat()))) { finalsql.append(" USING ").append(thisStatement.getTableFormat()); } else if (thisStatement.getExtendedTableProperties() != null && StringUtils.isNoneEmpty(thisStatement.getExtendedTableProperties().getTblProperties())) { - finalsql.append(" TBLPROPERTIES (").append(thisStatement.getExtendedTableProperties().getTblProperties()).append(")"); + finalsql.append(" TBLPROPERTIES (").append(avoidClusterProperties(thisStatement)).append(")"); } else { finalsql.append(" USING delta TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported', 'delta.columnMapping.mode' = 'name', 'delta.enableDeletionVectors' = true)"); } @@ -109,4 +111,23 @@ public Sql[] generateSql(CreateTableStatement statement, Database database, SqlG } + /** + * While we are passing TBLPROPERTIES as raw string into create table statement, especially in cases of + * changelog generation we need to sanitize them from 'clusteringColumns' property, otherwise generated changelog + * will fail to execute. + * Parsing of tblProperties map as an actual Map structured collection should make this approach safer and easier. + * @param statement CreateTableStatementDatabricks containing tblProperties raw string + * @return tblProperties string without 'clusteringColumns' property if it was present, otherwise untouched + * tblProperties raw string. + * */ + private String avoidClusterProperties(CreateTableStatementDatabricks statement) { + String tblProperties = statement.getExtendedTableProperties().getTblProperties(); + if(tblProperties.contains(CLUSTERING_INFORMATION_TBL_PROPERTY_START)) { + int clusterColumnsStartIndex = tblProperties.indexOf(CLUSTERING_INFORMATION_TBL_PROPERTY_START); + String replaceString = tblProperties.substring(clusterColumnsStartIndex, tblProperties.indexOf("\"]],", clusterColumnsStartIndex) + 4); + return tblProperties.replace(replaceString, ""); + } + return tblProperties; + } + } diff --git a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.json b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.json index 87d853ee..6b822b60 100644 --- a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.json +++ b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.json @@ -20,9 +20,15 @@ "name": "test_new", "type": "int" } + }, + { + "column": { + "name": "test_present_new", + "type": "int" + } } ], - "clusterColumns": "test_id,test_new" + "clusterColumns": "test_id,test_new,test_present_new" } } ], @@ -48,6 +54,11 @@ "column": { "name": "test_id" } + }, + { + "column": { + "name": "test_present_new`" + } } ] } diff --git a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.xml b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.xml index 69d0008c..d5eb617f 100644 --- a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.xml +++ b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.xml @@ -13,7 +13,8 @@ - test_id,test_new + + test_id,test_new,test_present_new @@ -23,6 +24,7 @@ + diff --git a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.yaml b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.yaml index 714e4493..f719fbc4 100644 --- a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.yaml +++ b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.yaml @@ -12,7 +12,10 @@ databaseChangeLog: - column: name: test_new type: int - clusterColumns: test_id, test_new + - column: + name: test_present_new + type: int + clusterColumns: test_id, test_new, test_present_new rollback: dropTable: tableName: test_table_clustered_new @@ -25,6 +28,8 @@ databaseChangeLog: columns: - column: name: test_id + - column: + name: test_present_new rollback: empty - changeSet: diff --git a/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createClusteredTableNew.json b/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createClusteredTableNew.json index 7a73a41b..fecd6ded 100644 --- a/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createClusteredTableNew.json +++ b/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createClusteredTableNew.json @@ -1,2 +1,25 @@ { + "snapshot": { + "objects": { + "liquibase.structure.core.Table": [ + { + "table": { + "name": "test_table_clustered_new" + } + } + ], + "liquibase.structure.core.Column": [ + { + "column": { + "name": "test_id" + } + }, + { + "column": { + "name": "test_present_new" + } + } + ] + } + } } \ No newline at end of file diff --git a/src/test/resources/liquibase/harness/change/expectedSql/databricks/createClusteredTableNew.sql b/src/test/resources/liquibase/harness/change/expectedSql/databricks/createClusteredTableNew.sql index 898576e5..c1737f53 100644 --- a/src/test/resources/liquibase/harness/change/expectedSql/databricks/createClusteredTableNew.sql +++ b/src/test/resources/liquibase/harness/change/expectedSql/databricks/createClusteredTableNew.sql @@ -1,3 +1,3 @@ -CREATE TABLE main.liquibase_harness_test_ds.test_table_clustered_new (test_id INT, test_new INT) USING delta TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported', 'delta.columnMapping.mode' = 'name', 'delta.enableDeletionVectors' = true) CLUSTER BY (test_id, test_new) -ALTER TABLE main.liquibase_harness_test_ds.test_table_clustered_new CLUSTER BY (test_id) +CREATE TABLE main.liquibase_harness_test_ds.test_table_clustered_new (test_id INT, test_new INT, test_present_new INT) USING delta TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported', 'delta.columnMapping.mode' = 'name', 'delta.enableDeletionVectors' = true) CLUSTER BY (test_id, test_new, test_present_new) +ALTER TABLE main.liquibase_harness_test_ds.test_table_clustered_new CLUSTER BY (test_id,test_present_new) ALTER TABLE main.liquibase_harness_test_ds.test_table_clustered_new DROP COLUMN test_new \ No newline at end of file