From ed2f55fef53920a8a406bbc043c25073a75072f8 Mon Sep 17 00:00:00 2001 From: KushnirykOleh Date: Thu, 26 Sep 2024 16:17:57 +0300 Subject: [PATCH 01/11] wip --- .../ColumnSnapshotGeneratorDatabricks.java | 45 +++++++++++---- .../jvm/TableSnapshotGeneratorDatabricks.java | 57 +++++++++++++++++++ .../liquibase.snapshot.SnapshotGenerator | 1 + .../expectedSql/databricks/createIndex.sql | 3 +- 4 files changed, 93 insertions(+), 13 deletions(-) create mode 100644 src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java diff --git a/src/main/java/liquibase/ext/databricks/snapshot/jvm/ColumnSnapshotGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/snapshot/jvm/ColumnSnapshotGeneratorDatabricks.java index e886c477..1b1f4ce2 100644 --- a/src/main/java/liquibase/ext/databricks/snapshot/jvm/ColumnSnapshotGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/snapshot/jvm/ColumnSnapshotGeneratorDatabricks.java @@ -1,29 +1,28 @@ package liquibase.ext.databricks.snapshot.jvm; +import liquibase.Scope; import liquibase.database.Database; import liquibase.exception.DatabaseException; +import liquibase.executor.ExecutorService; import liquibase.ext.databricks.database.DatabricksDatabase; import liquibase.snapshot.CachedRow; -import liquibase.snapshot.SnapshotGenerator; +import liquibase.snapshot.DatabaseSnapshot; import liquibase.snapshot.jvm.ColumnSnapshotGenerator; +import liquibase.statement.core.RawParameterizedSqlStatement; import liquibase.structure.DatabaseObject; import liquibase.structure.core.Column; import liquibase.structure.core.DataType; +import java.util.List; +import java.util.Map; public class ColumnSnapshotGeneratorDatabricks extends ColumnSnapshotGenerator { @Override public int getPriority(Class objectType, Database database) { - if (database instanceof DatabricksDatabase) { - return super.getPriority(objectType, database) + PRIORITY_DATABASE; - } else { - return PRIORITY_NONE; - } - } + if (database instanceof DatabricksDatabase) + return PRIORITY_DATABASE; + return PRIORITY_NONE; - @Override - public Class[] replaces() { - return new Class[] { ColumnSnapshotGenerator.class }; } /** @@ -43,4 +42,28 @@ protected DataType readDataType(CachedRow columnMetadataResultSet, Column column } return super.readDataType(columnMetadataResultSet, column, database); } -} + + + @Override + protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot snapshot) throws DatabaseException { + if (example instanceof Column) { + Column column = (Column) super.snapshotObject(example, snapshot); + Database database = snapshot.getDatabase(); + + String query = String.format("SELECT column_default from %s.COLUMNS where table_name = '%s' AND table_schema='%s' AND column_name ='%s';", + database.getSystemSchema(), + column.getRelation().getName(), + column.getRelation().getSchema().getName(), + column.getName()); + List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) + .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); + for (Map tableProperty : tablePropertiesResponse) { + column.setDefaultValue(tableProperty.get("COLUMN_DEFAULT")); + } + return column; + } else { + return example; + } + } + +} \ No newline at end of file diff --git a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java new file mode 100644 index 00000000..4080499a --- /dev/null +++ b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java @@ -0,0 +1,57 @@ +package liquibase.ext.databricks.snapshot.jvm; + +import liquibase.Scope; +import liquibase.database.Database; +import liquibase.exception.DatabaseException; +import liquibase.executor.ExecutorService; +import liquibase.ext.databricks.database.DatabricksDatabase; +import liquibase.snapshot.DatabaseSnapshot; +import liquibase.snapshot.jvm.TableSnapshotGenerator; +import liquibase.statement.core.RawParameterizedSqlStatement; +import liquibase.structure.DatabaseObject; +import liquibase.structure.core.Table; + +import java.util.List; +import java.util.Map; + +/** + * Created by vesterma on 06/02/14. + */ +public class TableSnapshotGeneratorDatabricks extends TableSnapshotGenerator { + + @Override + public int getPriority(Class objectType, Database database) { + if (database instanceof DatabricksDatabase) + return PRIORITY_DATABASE; + return PRIORITY_NONE; + + } + + @Override + protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot snapshot) throws DatabaseException { + Table table = (Table) super.snapshotObject(example, snapshot); + Database database = snapshot.getDatabase(); + + String query = String.format("DESCRIBE TABLE EXTENDED %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), example.getName()); + List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) + .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); + for (Map tableProperty : tablePropertiesResponse) { + table.setAttribute((String) tableProperty.get("KEY"), tableProperty.get("VALUE")); + } + return table; + +// String query = String.format("SHOW TBLPROPERTIES %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), example.getName()); +// List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) +// .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); +// for (Map tableProperty : tablePropertiesResponse) { +// table.setAttribute((String) tableProperty.get("KEY"), tableProperty.get("VALUE")); +// } +// return table; + } + +// @Override +// public Class[] replaces() { +// return new Class[]{TableSnapshotGenerator.class}; +// } + +} \ No newline at end of file diff --git a/src/main/resources/META-INF/services/liquibase.snapshot.SnapshotGenerator b/src/main/resources/META-INF/services/liquibase.snapshot.SnapshotGenerator index e92aa426..9a020738 100644 --- a/src/main/resources/META-INF/services/liquibase.snapshot.SnapshotGenerator +++ b/src/main/resources/META-INF/services/liquibase.snapshot.SnapshotGenerator @@ -5,4 +5,5 @@ liquibase.ext.databricks.snapshot.jvm.UniqueConstraintSnapshotGeneratorDatabrick liquibase.ext.databricks.snapshot.jvm.IndexSnapshotGeneratorDatabricks liquibase.ext.databricks.snapshot.jvm.ViewSnapshotGeneratorDatabricks liquibase.ext.databricks.snapshot.jvm.ColumnSnapshotGeneratorDatabricks +liquibase.ext.databricks.snapshot.jvm.TableSnapshotGeneratorDatabricks diff --git a/src/test/resources/liquibase/harness/change/expectedSql/databricks/createIndex.sql b/src/test/resources/liquibase/harness/change/expectedSql/databricks/createIndex.sql index 6ade02aa..159f4c3e 100644 --- a/src/test/resources/liquibase/harness/change/expectedSql/databricks/createIndex.sql +++ b/src/test/resources/liquibase/harness/change/expectedSql/databricks/createIndex.sql @@ -1,2 +1 @@ -INVALID TEST --- Databricks will support index via the CLUSTER BY commands, but the createTable change type needs to be altered first \ No newline at end of file +INVALID TEST -- Databricks will support index via the CLUSTER BY commands, but the createTable change type needs to be altered first \ No newline at end of file From 74850a858902f3c81623bcb047fe3740e04d06b4 Mon Sep 17 00:00:00 2001 From: KushnirykOleh Date: Fri, 4 Oct 2024 16:27:02 +0300 Subject: [PATCH 02/11] reverted ColumnSnapshotGeneratorDatabricks, it went to DAT-18790 --- .../ColumnSnapshotGeneratorDatabricks.java | 45 +++++-------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/src/main/java/liquibase/ext/databricks/snapshot/jvm/ColumnSnapshotGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/snapshot/jvm/ColumnSnapshotGeneratorDatabricks.java index 1b1f4ce2..e886c477 100644 --- a/src/main/java/liquibase/ext/databricks/snapshot/jvm/ColumnSnapshotGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/snapshot/jvm/ColumnSnapshotGeneratorDatabricks.java @@ -1,28 +1,29 @@ package liquibase.ext.databricks.snapshot.jvm; -import liquibase.Scope; import liquibase.database.Database; import liquibase.exception.DatabaseException; -import liquibase.executor.ExecutorService; import liquibase.ext.databricks.database.DatabricksDatabase; import liquibase.snapshot.CachedRow; -import liquibase.snapshot.DatabaseSnapshot; +import liquibase.snapshot.SnapshotGenerator; import liquibase.snapshot.jvm.ColumnSnapshotGenerator; -import liquibase.statement.core.RawParameterizedSqlStatement; import liquibase.structure.DatabaseObject; import liquibase.structure.core.Column; import liquibase.structure.core.DataType; -import java.util.List; -import java.util.Map; public class ColumnSnapshotGeneratorDatabricks extends ColumnSnapshotGenerator { @Override public int getPriority(Class objectType, Database database) { - if (database instanceof DatabricksDatabase) - return PRIORITY_DATABASE; - return PRIORITY_NONE; + if (database instanceof DatabricksDatabase) { + return super.getPriority(objectType, database) + PRIORITY_DATABASE; + } else { + return PRIORITY_NONE; + } + } + @Override + public Class[] replaces() { + return new Class[] { ColumnSnapshotGenerator.class }; } /** @@ -42,28 +43,4 @@ protected DataType readDataType(CachedRow columnMetadataResultSet, Column column } return super.readDataType(columnMetadataResultSet, column, database); } - - - @Override - protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot snapshot) throws DatabaseException { - if (example instanceof Column) { - Column column = (Column) super.snapshotObject(example, snapshot); - Database database = snapshot.getDatabase(); - - String query = String.format("SELECT column_default from %s.COLUMNS where table_name = '%s' AND table_schema='%s' AND column_name ='%s';", - database.getSystemSchema(), - column.getRelation().getName(), - column.getRelation().getSchema().getName(), - column.getName()); - List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) - .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); - for (Map tableProperty : tablePropertiesResponse) { - column.setDefaultValue(tableProperty.get("COLUMN_DEFAULT")); - } - return column; - } else { - return example; - } - } - -} \ No newline at end of file +} From c758cf3788e70c105f550d6e13a0546f8ced1c9b Mon Sep 17 00:00:00 2001 From: KushnirykOleh Date: Mon, 7 Oct 2024 16:04:28 +0300 Subject: [PATCH 03/11] wip --- .../jvm/TableSnapshotGeneratorDatabricks.java | 45 ++++++++++++++++++- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java index 4080499a..02a56f29 100644 --- a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java @@ -19,6 +19,10 @@ */ public class TableSnapshotGeneratorDatabricks extends TableSnapshotGenerator { + private final static String LOCATION = "Location"; + private final static String TABLE_PROPERTIES = "Table Properties"; + private final static String DETAILED_TABLE_INFORMATION_NODE = "# Detailed Table Information"; + @Override public int getPriority(Class objectType, Database database) { if (database instanceof DatabricksDatabase) @@ -35,16 +39,53 @@ protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot String query = String.format("DESCRIBE TABLE EXTENDED %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), example.getName()); List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); + // DESCRIBE TABLE EXTENDED returns both columns and additional information. + // We need to make sure "Location" is not column in the table, but table location in s3 + boolean detailedInformationNode = false; for (Map tableProperty : tablePropertiesResponse) { - table.setAttribute((String) tableProperty.get("KEY"), tableProperty.get("VALUE")); + if (tableProperty.get("COL_NAME").equals(DETAILED_TABLE_INFORMATION_NODE)) { + detailedInformationNode = true; + continue; + } + if (detailedInformationNode && tableProperty.get("COL_NAME").equals(LOCATION)) { + table.setAttribute(LOCATION, tableProperty.get("DATA_TYPE")); + } + if (detailedInformationNode && tableProperty.get("COL_NAME").equals(TABLE_PROPERTIES)) { + //TODO should i parse and split TableProperties or keep them all together? + table.setAttribute(TABLE_PROPERTIES, tableProperty.get("DATA_TYPE")); + } + //TODO i can get default value for column here `# Column Default Values` ->"COL_NAME" -> "title", "DATA_TYPE" -> "string", "COMMENT" -> + // "'title_test'" --actual default value for the column is in comment column of this resultSet } return table; + +// String query = String.format("SHOW TBLPROPERTIES %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), example.getName()); + +// String query = String.format("SHOW TABLE EXTENDED IN %s.%s LIKE '%s';", database.getDefaultCatalogName(), database.getDefaultSchemaName(), +// example.getName()); +// List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) +// .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); +// for (Map tableProperty : tablePropertiesResponse) { +// String[] tableParts = ((String) tableProperty.get("INFORMATION")).split("\\r?\\n"); +// for (String tablePart : tableParts) { +// if (tablePart.startsWith("Location:")) { +// table.setAttribute("Location", tablePart.replace("Location: ", "")); +// } +// if (tablePart.startsWith("Table Properties:")) { +// table.setAttribute("Table Properties", tablePart.replace("Table Properties: [", "").replace("]", "")); +// } +// } +// } + // String query = String.format("SHOW TBLPROPERTIES %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), example.getName()); // List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) // .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); // for (Map tableProperty : tablePropertiesResponse) { -// table.setAttribute((String) tableProperty.get("KEY"), tableProperty.get("VALUE")); +// //TODO combine into +// // "'key0'='value0', 'key1'='value1', .... 'keyN'='valueN'" +// // csv string +// // } // return table; } From 3e29705f51997083cea96b569d68895d2c70f925 Mon Sep 17 00:00:00 2001 From: KushnirykOleh Date: Tue, 8 Oct 2024 17:56:48 +0300 Subject: [PATCH 04/11] added missingTable generator --- .../createTable/ExtendedTableProperties.java | 26 +++----- ...MissingTableChangeGeneratorDatabricks.java | 62 +++++++++++++++++++ .../jvm/TableSnapshotGeneratorDatabricks.java | 44 +------------ ...base.diff.output.changelog.ChangeGenerator | 1 + 4 files changed, 75 insertions(+), 58 deletions(-) create mode 100644 src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java diff --git a/src/main/java/liquibase/ext/databricks/change/createTable/ExtendedTableProperties.java b/src/main/java/liquibase/ext/databricks/change/createTable/ExtendedTableProperties.java index 3d3a681a..cf826f25 100644 --- a/src/main/java/liquibase/ext/databricks/change/createTable/ExtendedTableProperties.java +++ b/src/main/java/liquibase/ext/databricks/change/createTable/ExtendedTableProperties.java @@ -1,7 +1,15 @@ package liquibase.ext.databricks.change.createTable; import liquibase.serializer.AbstractLiquibaseSerializable; - +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +@NoArgsConstructor +@AllArgsConstructor +@Setter +@Getter public class ExtendedTableProperties extends AbstractLiquibaseSerializable{ private String tableLocation; private String tblProperties; @@ -15,20 +23,4 @@ public String getSerializedObjectName() { public String getSerializedObjectNamespace() { return "http://www.liquibase.org/xml/ns/databricks"; } - - public String getTableLocation() { - return tableLocation; - } - - public void setTableLocation(String tableLocation) { - this.tableLocation = tableLocation; - } - - public String getTblProperties() { - return tblProperties; - } - - public void setTblProperties(String tblProperties) { - this.tblProperties = tblProperties; - } } diff --git a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java new file mode 100644 index 00000000..519f8ccc --- /dev/null +++ b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java @@ -0,0 +1,62 @@ +package liquibase.ext.databricks.diff.output.changelog; + +import liquibase.change.Change; +import liquibase.change.core.CreateTableChange; +import liquibase.database.Database; +import liquibase.diff.output.DiffOutputControl; +import liquibase.diff.output.changelog.ChangeGeneratorChain; +import liquibase.diff.output.changelog.core.MissingTableChangeGenerator; +import liquibase.ext.databricks.change.createTable.CreateTableChangeDatabricks; +import liquibase.ext.databricks.change.createTable.ExtendedTableProperties; +import liquibase.ext.databricks.database.DatabricksDatabase; +import liquibase.structure.DatabaseObject; +import liquibase.structure.core.Table; + +public class MissingTableChangeGeneratorDatabricks extends MissingTableChangeGenerator { + + + @Override + public int getPriority(Class objectType, Database database) { + if (database instanceof DatabricksDatabase && Table.class.isAssignableFrom(objectType)) { + return PRIORITY_DATABASE; + } else { + return PRIORITY_NONE; + } + } + + @Override + public Change[] fixMissing(DatabaseObject missingObject, DiffOutputControl control, Database referenceDatabase, Database comparisonDatabase, + ChangeGeneratorChain chain) { + Change[] changes = super.fixMissing(missingObject, control, referenceDatabase, comparisonDatabase, chain); + if (changes == null || changes.length == 0) { + return changes; + } + ExtendedTableProperties extendedTableProperties = new ExtendedTableProperties( + missingObject.getAttribute("Location", String.class), + missingObject.getAttribute("tblProperties", String.class)); + + changes[0] = getCreateTableChangeDatabricks(extendedTableProperties, changes); + return changes; + } + + private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTableProperties extendedTableProperties, Change[] changes) { + CreateTableChange temp = (CreateTableChange) changes[0]; + CreateTableChangeDatabricks createTableChangeDatabricks = new CreateTableChangeDatabricks(); + createTableChangeDatabricks.setColumns(temp.getColumns()); + createTableChangeDatabricks.setTableType(temp.getTableType()); + createTableChangeDatabricks.setCatalogName(temp.getCatalogName()); + createTableChangeDatabricks.setSchemaName(temp.getSchemaName()); + createTableChangeDatabricks.setTableName(temp.getTableName()); + createTableChangeDatabricks.setTablespace(temp.getTablespace()); + createTableChangeDatabricks.setRemarks(temp.getRemarks()); + createTableChangeDatabricks.setIfNotExists(temp.getIfNotExists()); + createTableChangeDatabricks.setRowDependencies(temp.getRowDependencies()); + + createTableChangeDatabricks.setExtendedTableProperties(extendedTableProperties); + return createTableChangeDatabricks; + } + + protected CreateTableChange createCreateTableChange() { + return new CreateTableChangeDatabricks(); + } +} diff --git a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java index 02a56f29..b6ae9002 100644 --- a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java @@ -14,13 +14,11 @@ import java.util.List; import java.util.Map; -/** - * Created by vesterma on 06/02/14. - */ public class TableSnapshotGeneratorDatabricks extends TableSnapshotGenerator { private final static String LOCATION = "Location"; private final static String TABLE_PROPERTIES = "Table Properties"; + private final static String TBL_PROPERTIES = "tblProperties"; private final static String DETAILED_TABLE_INFORMATION_NODE = "# Detailed Table Information"; @Override @@ -51,48 +49,12 @@ protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot table.setAttribute(LOCATION, tableProperty.get("DATA_TYPE")); } if (detailedInformationNode && tableProperty.get("COL_NAME").equals(TABLE_PROPERTIES)) { - //TODO should i parse and split TableProperties or keep them all together? - table.setAttribute(TABLE_PROPERTIES, tableProperty.get("DATA_TYPE")); + String tblProperties = (String) tableProperty.get("DATA_TYPE"); + table.setAttribute(TBL_PROPERTIES, tblProperties.substring(1, tblProperties.length() - 1));// remove starting and ending square brackets } - //TODO i can get default value for column here `# Column Default Values` ->"COL_NAME" -> "title", "DATA_TYPE" -> "string", "COMMENT" -> - // "'title_test'" --actual default value for the column is in comment column of this resultSet } return table; - -// String query = String.format("SHOW TBLPROPERTIES %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), example.getName()); - -// String query = String.format("SHOW TABLE EXTENDED IN %s.%s LIKE '%s';", database.getDefaultCatalogName(), database.getDefaultSchemaName(), -// example.getName()); -// List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) -// .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); -// for (Map tableProperty : tablePropertiesResponse) { -// String[] tableParts = ((String) tableProperty.get("INFORMATION")).split("\\r?\\n"); -// for (String tablePart : tableParts) { -// if (tablePart.startsWith("Location:")) { -// table.setAttribute("Location", tablePart.replace("Location: ", "")); -// } -// if (tablePart.startsWith("Table Properties:")) { -// table.setAttribute("Table Properties", tablePart.replace("Table Properties: [", "").replace("]", "")); -// } -// } -// } - -// String query = String.format("SHOW TBLPROPERTIES %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), example.getName()); -// List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) -// .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); -// for (Map tableProperty : tablePropertiesResponse) { -// //TODO combine into -// // "'key0'='value0', 'key1'='value1', .... 'keyN'='valueN'" -// // csv string -// -// } -// return table; } -// @Override -// public Class[] replaces() { -// return new Class[]{TableSnapshotGenerator.class}; -// } - } \ No newline at end of file diff --git a/src/main/resources/META-INF/services/liquibase.diff.output.changelog.ChangeGenerator b/src/main/resources/META-INF/services/liquibase.diff.output.changelog.ChangeGenerator index c50ff91c..6e93951a 100644 --- a/src/main/resources/META-INF/services/liquibase.diff.output.changelog.ChangeGenerator +++ b/src/main/resources/META-INF/services/liquibase.diff.output.changelog.ChangeGenerator @@ -1 +1,2 @@ +liquibase.ext.databricks.diff.output.changelog.MissingTableChangeGeneratorDatabricks liquibase.ext.databricks.diff.output.changelog.MissingViewChangeGeneratorDatabricks From be58985264468421bc608ffa20f26dc02839fb08 Mon Sep 17 00:00:00 2001 From: KushnirykOleh Date: Thu, 10 Oct 2024 15:31:31 +0300 Subject: [PATCH 05/11] addressed PR comment --- .../databricks/change/createTable/ExtendedTableProperties.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/liquibase/ext/databricks/change/createTable/ExtendedTableProperties.java b/src/main/java/liquibase/ext/databricks/change/createTable/ExtendedTableProperties.java index cf826f25..4dddfc5b 100644 --- a/src/main/java/liquibase/ext/databricks/change/createTable/ExtendedTableProperties.java +++ b/src/main/java/liquibase/ext/databricks/change/createTable/ExtendedTableProperties.java @@ -1,5 +1,6 @@ package liquibase.ext.databricks.change.createTable; +import liquibase.ext.databricks.parser.NamespaceDetailsDatabricks; import liquibase.serializer.AbstractLiquibaseSerializable; import lombok.AllArgsConstructor; import lombok.Getter; @@ -21,6 +22,6 @@ public String getSerializedObjectName() { @Override public String getSerializedObjectNamespace() { - return "http://www.liquibase.org/xml/ns/databricks"; + return NamespaceDetailsDatabricks.DATABRICKS_NAMESPACE; } } From 392c4176d223f3c72159841837f4020cd3c902d9 Mon Sep 17 00:00:00 2001 From: KushnirykOleh Date: Fri, 11 Oct 2024 12:19:56 +0300 Subject: [PATCH 06/11] added tblProperties to expectedJson --- .../databricks/createTable.json | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createTable.json diff --git a/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createTable.json b/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createTable.json new file mode 100644 index 00000000..ff386c5c --- /dev/null +++ b/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createTable.json @@ -0,0 +1,27 @@ +{ + "snapshot": { + "objects": { + "liquibase.structure.core.Table": [ + { + "table": { + "name": "test_table", + "tblProperties" : "delta.checkpoint.writeStatsAsJson=false,delta.checkpoint.writeStatsAsStruct=true,delta.columnMapping.maxColumnId=2,delta.columnMapping.mode=name,delta.enableDeletionVectors=true,delta.feature.allowColumnDefaults=supported,delta.feature.columnMapping=supported,delta.feature.deletionVectors=supported,delta.feature.invariants=supported,delta.minReaderVersion=3,delta.minWriterVersion=7" + } + }, + { + "table": { + "name": "test_table_properties", + "tblProperties" : "delta.checkpoint.writeStatsAsJson=false,delta.checkpoint.writeStatsAsStruct=true,delta.enableDeletionVectors=true,delta.feature.deletionVectors=supported,delta.feature.invariants=supported,delta.minReaderVersion=3,delta.minWriterVersion=7,external.location=s3://mybucket/mytable,this.is.my.key=12,this.is.my.key2=true" + } + } + ], + "liquibase.structure.core.Column": [ + { + "column": { + "name": "test_id" + } + } + ] + } + } +} \ No newline at end of file From 31efa0e73cb869a9a05f25c2d3f562ad71c90020 Mon Sep 17 00:00:00 2001 From: KushnirykOleh Date: Fri, 11 Oct 2024 12:28:35 +0300 Subject: [PATCH 07/11] resolved Sonar issues --- .../changelog/MissingTableChangeGeneratorDatabricks.java | 1 + .../snapshot/jvm/TableSnapshotGeneratorDatabricks.java | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java index 519f8ccc..b123a70e 100644 --- a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java @@ -56,6 +56,7 @@ private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTable return createTableChangeDatabricks; } + @Override protected CreateTableChange createCreateTableChange() { return new CreateTableChangeDatabricks(); } diff --git a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java index b6ae9002..cccaa1ca 100644 --- a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java @@ -16,10 +16,10 @@ public class TableSnapshotGeneratorDatabricks extends TableSnapshotGenerator { - private final static String LOCATION = "Location"; - private final static String TABLE_PROPERTIES = "Table Properties"; - private final static String TBL_PROPERTIES = "tblProperties"; - private final static String DETAILED_TABLE_INFORMATION_NODE = "# Detailed Table Information"; + private static final String LOCATION = "Location"; + private static final String TABLE_PROPERTIES = "Table Properties"; + private static final String TBL_PROPERTIES = "tblProperties"; + private static final String DETAILED_TABLE_INFORMATION_NODE = "# Detailed Table Information"; @Override public int getPriority(Class objectType, Database database) { From 32bd731fa75b80b919dc53d58ceafef0a2541869 Mon Sep 17 00:00:00 2001 From: Mykhailo Savchenko Date: Tue, 15 Oct 2024 12:15:07 +0300 Subject: [PATCH 08/11] DAT-18792: added cluster by support for snapshot and diff table related change types --- ...MissingTableChangeGeneratorDatabricks.java | 53 ++++++++++++++++++- .../CreateTableGeneratorDatabricks.java | 23 +++++++- .../databricks/createClusteredTableNew.json | 13 ++++- .../databricks/createClusteredTableNew.xml | 4 +- .../databricks/createClusteredTableNew.yaml | 7 ++- .../databricks/createClusteredTableNew.json | 23 ++++++++ .../databricks/createClusteredTableNew.sql | 4 +- 7 files changed, 120 insertions(+), 7 deletions(-) diff --git a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java index b123a70e..b8ca99e5 100644 --- a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java @@ -12,8 +12,12 @@ import liquibase.structure.DatabaseObject; import liquibase.structure.core.Table; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + public class MissingTableChangeGeneratorDatabricks extends MissingTableChangeGenerator { + private static final String CLUSTERING_INFORMATION_TBL_PROPERTY_START = "clusteringColumns=[["; @Override public int getPriority(Class objectType, Database database) { @@ -51,6 +55,13 @@ private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTable createTableChangeDatabricks.setRemarks(temp.getRemarks()); createTableChangeDatabricks.setIfNotExists(temp.getIfNotExists()); createTableChangeDatabricks.setRowDependencies(temp.getRowDependencies()); + if(extendedTableProperties.getTblProperties().contains(CLUSTERING_INFORMATION_TBL_PROPERTY_START)) { + //This approach should be rewritten after implementing tblProperties map parsing to Map struct collection + String tblProperties = extendedTableProperties.getTblProperties(); + createTableChangeDatabricks.setClusterColumns(parseClusterColumns(tblProperties)); + //Remove clusteringColumns property from tblProperties + extendedTableProperties.setTblProperties(tblProperties.replace(getClusteringColumnsSubstring(tblProperties), "")); + } createTableChangeDatabricks.setExtendedTableProperties(extendedTableProperties); return createTableChangeDatabricks; @@ -60,4 +71,44 @@ private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTable protected CreateTableChange createCreateTableChange() { return new CreateTableChangeDatabricks(); } -} + + /** + * There might be 2 edge cases with table properties map: + *
  • User specified a custom tblProperty that appears prior databricks internal managed + * clusteringColumns property and this custom tblProperty contains string snippet 'clusteringColumns=[['. + * Pattern and matcher would process properties incorrect if there was present structure ["anyString"] + * in between string snippet 'clusteringColumns=[[' and actual databricks managed property + * clusteringColumns. + *
  • User specified a custom table property that contains string snippet 'clusteringColumns=' and there are + * no clustering configured on the table.
      + * @param tblProperties + * The tblProperties map in a raw string format that returns as part of result set of + * DESCRIBE TABLE EXTENDED query. + * @return Coma separated clustering columns extracted from tblProperties + * */ + private String parseClusterColumns(String tblProperties) { + // Actual pattern - "\[\"([^\"]*)\"\]" + Pattern pattern = Pattern.compile("\\[\\\"([^\\\"]*)\\\"\\]"); + String clusteringColumnsRaw = getClusteringColumnsSubstring(tblProperties); + StringBuilder clusterColumnNames = new StringBuilder(); + try{ + Matcher matcher = pattern.matcher(clusteringColumnsRaw); + for (int i = 0; matcher.find(); i++) { + //getting first matching group to avoid quotes and brackets + String group = matcher.group(1); + clusterColumnNames.append(i == 0 ? "" : ",").append(group); + } + } catch (IllegalStateException e) { + e.printStackTrace(); + } + + return clusterColumnNames.toString(); + } + + private String getClusteringColumnsSubstring(String tblProperties) { + int clusterColumnsStartIndex = tblProperties.indexOf(CLUSTERING_INFORMATION_TBL_PROPERTY_START); + // To avoid appearance anywhere before clusteringColumns property we are specifying clusterColumnsStartIndex + // to start search from for end index snippet. + return tblProperties.substring(clusterColumnsStartIndex, tblProperties.indexOf("\"]],", clusterColumnsStartIndex) + 4); + } +} \ No newline at end of file diff --git a/src/main/java/liquibase/ext/databricks/sqlgenerator/CreateTableGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/sqlgenerator/CreateTableGeneratorDatabricks.java index 01998ece..6ed2dcdc 100644 --- a/src/main/java/liquibase/ext/databricks/sqlgenerator/CreateTableGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/sqlgenerator/CreateTableGeneratorDatabricks.java @@ -17,6 +17,8 @@ public class CreateTableGeneratorDatabricks extends CreateTableGenerator { + private static final String CLUSTERING_INFORMATION_TBL_PROPERTY_START = "clusteringColumns=[["; + @Override public int getPriority() { @@ -49,7 +51,7 @@ public Sql[] generateSql(CreateTableStatement statement, Database database, SqlG if ((!StringUtils.isEmpty(thisStatement.getTableFormat()))) { finalsql.append(" USING ").append(thisStatement.getTableFormat()); } else if (thisStatement.getExtendedTableProperties() != null && StringUtils.isNoneEmpty(thisStatement.getExtendedTableProperties().getTblProperties())) { - finalsql.append(" TBLPROPERTIES (").append(thisStatement.getExtendedTableProperties().getTblProperties()).append(")"); + finalsql.append(" TBLPROPERTIES (").append(avoidClusterProperties(thisStatement)).append(")"); } else { finalsql.append(" USING delta TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported', 'delta.columnMapping.mode' = 'name', 'delta.enableDeletionVectors' = true)"); } @@ -109,4 +111,23 @@ public Sql[] generateSql(CreateTableStatement statement, Database database, SqlG } + /** + * While we are passing TBLPROPERTIES as raw string into create table statement, especially in cases of + * changelog generation we need to sanitize them from 'clusteringColumns' property, otherwise generated changelog + * will fail to execute. + * Parsing of tblProperties map as an actual Map structured collection should make this approach safer and easier. + * @param statement CreateTableStatementDatabricks containing tblProperties raw string + * @return tblProperties string without 'clusteringColumns' property if it was present, otherwise untouched + * tblProperties raw string. + * */ + private String avoidClusterProperties(CreateTableStatementDatabricks statement) { + String tblProperties = statement.getExtendedTableProperties().getTblProperties(); + if(tblProperties.contains(CLUSTERING_INFORMATION_TBL_PROPERTY_START)) { + int clusterColumnsStartIndex = tblProperties.indexOf(CLUSTERING_INFORMATION_TBL_PROPERTY_START); + String replaceString = tblProperties.substring(clusterColumnsStartIndex, tblProperties.indexOf("\"]],", clusterColumnsStartIndex) + 4); + return tblProperties.replace(replaceString, ""); + } + return tblProperties; + } + } diff --git a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.json b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.json index 87d853ee..6b822b60 100644 --- a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.json +++ b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.json @@ -20,9 +20,15 @@ "name": "test_new", "type": "int" } + }, + { + "column": { + "name": "test_present_new", + "type": "int" + } } ], - "clusterColumns": "test_id,test_new" + "clusterColumns": "test_id,test_new,test_present_new" } } ], @@ -48,6 +54,11 @@ "column": { "name": "test_id" } + }, + { + "column": { + "name": "test_present_new`" + } } ] } diff --git a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.xml b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.xml index 69d0008c..d5eb617f 100644 --- a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.xml +++ b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.xml @@ -13,7 +13,8 @@ - test_id,test_new + + test_id,test_new,test_present_new @@ -23,6 +24,7 @@ + diff --git a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.yaml b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.yaml index 714e4493..f719fbc4 100644 --- a/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.yaml +++ b/src/test/resources/liquibase/harness/change/changelogs/databricks/createClusteredTableNew.yaml @@ -12,7 +12,10 @@ databaseChangeLog: - column: name: test_new type: int - clusterColumns: test_id, test_new + - column: + name: test_present_new + type: int + clusterColumns: test_id, test_new, test_present_new rollback: dropTable: tableName: test_table_clustered_new @@ -25,6 +28,8 @@ databaseChangeLog: columns: - column: name: test_id + - column: + name: test_present_new rollback: empty - changeSet: diff --git a/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createClusteredTableNew.json b/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createClusteredTableNew.json index 7a73a41b..fecd6ded 100644 --- a/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createClusteredTableNew.json +++ b/src/test/resources/liquibase/harness/change/expectedSnapshot/databricks/createClusteredTableNew.json @@ -1,2 +1,25 @@ { + "snapshot": { + "objects": { + "liquibase.structure.core.Table": [ + { + "table": { + "name": "test_table_clustered_new" + } + } + ], + "liquibase.structure.core.Column": [ + { + "column": { + "name": "test_id" + } + }, + { + "column": { + "name": "test_present_new" + } + } + ] + } + } } \ No newline at end of file diff --git a/src/test/resources/liquibase/harness/change/expectedSql/databricks/createClusteredTableNew.sql b/src/test/resources/liquibase/harness/change/expectedSql/databricks/createClusteredTableNew.sql index 898576e5..c1737f53 100644 --- a/src/test/resources/liquibase/harness/change/expectedSql/databricks/createClusteredTableNew.sql +++ b/src/test/resources/liquibase/harness/change/expectedSql/databricks/createClusteredTableNew.sql @@ -1,3 +1,3 @@ -CREATE TABLE main.liquibase_harness_test_ds.test_table_clustered_new (test_id INT, test_new INT) USING delta TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported', 'delta.columnMapping.mode' = 'name', 'delta.enableDeletionVectors' = true) CLUSTER BY (test_id, test_new) -ALTER TABLE main.liquibase_harness_test_ds.test_table_clustered_new CLUSTER BY (test_id) +CREATE TABLE main.liquibase_harness_test_ds.test_table_clustered_new (test_id INT, test_new INT, test_present_new INT) USING delta TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'supported', 'delta.columnMapping.mode' = 'name', 'delta.enableDeletionVectors' = true) CLUSTER BY (test_id, test_new, test_present_new) +ALTER TABLE main.liquibase_harness_test_ds.test_table_clustered_new CLUSTER BY (test_id,test_present_new) ALTER TABLE main.liquibase_harness_test_ds.test_table_clustered_new DROP COLUMN test_new \ No newline at end of file From fe3dd5c7b9992489ebecec1b944e0f5e3384aa09 Mon Sep 17 00:00:00 2001 From: KushnirykOleh Date: Tue, 15 Oct 2024 15:15:48 +0300 Subject: [PATCH 09/11] fixed exception when DBCL is missing --- .../jvm/TableSnapshotGeneratorDatabricks.java | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java index cccaa1ca..d47260c1 100644 --- a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java @@ -33,28 +33,29 @@ public int getPriority(Class objectType, Database data protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot snapshot) throws DatabaseException { Table table = (Table) super.snapshotObject(example, snapshot); Database database = snapshot.getDatabase(); - - String query = String.format("DESCRIBE TABLE EXTENDED %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), example.getName()); - List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) - .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); - // DESCRIBE TABLE EXTENDED returns both columns and additional information. - // We need to make sure "Location" is not column in the table, but table location in s3 - boolean detailedInformationNode = false; - for (Map tableProperty : tablePropertiesResponse) { - if (tableProperty.get("COL_NAME").equals(DETAILED_TABLE_INFORMATION_NODE)) { - detailedInformationNode = true; - continue; - } - if (detailedInformationNode && tableProperty.get("COL_NAME").equals(LOCATION)) { - table.setAttribute(LOCATION, tableProperty.get("DATA_TYPE")); - } - if (detailedInformationNode && tableProperty.get("COL_NAME").equals(TABLE_PROPERTIES)) { - String tblProperties = (String) tableProperty.get("DATA_TYPE"); - table.setAttribute(TBL_PROPERTIES, tblProperties.substring(1, tblProperties.length() - 1));// remove starting and ending square brackets + if (table != null) { + String query = String.format("DESCRIBE TABLE EXTENDED %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), + example.getName()); + List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) + .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); + // DESCRIBE TABLE EXTENDED returns both columns and additional information. + // We need to make sure "Location" is not column in the table, but table location in s3 + boolean detailedInformationNode = false; + for (Map tableProperty : tablePropertiesResponse) { + if (tableProperty.get("COL_NAME").equals(DETAILED_TABLE_INFORMATION_NODE)) { + detailedInformationNode = true; + continue; + } + if (detailedInformationNode && tableProperty.get("COL_NAME").equals(LOCATION)) { + table.setAttribute(LOCATION, tableProperty.get("DATA_TYPE")); + } + if (detailedInformationNode && tableProperty.get("COL_NAME").equals(TABLE_PROPERTIES)) { + String tblProperties = (String) tableProperty.get("DATA_TYPE"); + table.setAttribute(TBL_PROPERTIES, tblProperties.substring(1, tblProperties.length() - 1));// remove starting and ending square brackets + } } } return table; - } } \ No newline at end of file From eb0557d896aaf3ba921a735b1b855ee5de1999e6 Mon Sep 17 00:00:00 2001 From: Mykhailo Savchenko Date: Wed, 16 Oct 2024 14:12:29 +0300 Subject: [PATCH 10/11] DAT-18792: added extra DB call to fetch TBLPROPERIES map in key + value format. --- ...MissingTableChangeGeneratorDatabricks.java | 58 +++---------------- .../jvm/TableSnapshotGeneratorDatabricks.java | 31 ++++++++-- 2 files changed, 35 insertions(+), 54 deletions(-) diff --git a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java index b8ca99e5..b6662b5e 100644 --- a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java @@ -12,13 +12,10 @@ import liquibase.structure.DatabaseObject; import liquibase.structure.core.Table; -import java.util.regex.Matcher; import java.util.regex.Pattern; public class MissingTableChangeGeneratorDatabricks extends MissingTableChangeGenerator { - private static final String CLUSTERING_INFORMATION_TBL_PROPERTY_START = "clusteringColumns=[["; - @Override public int getPriority(Class objectType, Database database) { if (database instanceof DatabricksDatabase && Table.class.isAssignableFrom(objectType)) { @@ -38,12 +35,14 @@ public Change[] fixMissing(DatabaseObject missingObject, DiffOutputControl contr ExtendedTableProperties extendedTableProperties = new ExtendedTableProperties( missingObject.getAttribute("Location", String.class), missingObject.getAttribute("tblProperties", String.class)); + String clusterColumns = missingObject.getAttribute("clusterColumns", ""); - changes[0] = getCreateTableChangeDatabricks(extendedTableProperties, changes); + changes[0] = getCreateTableChangeDatabricks(extendedTableProperties, changes, clusterColumns); return changes; } - private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTableProperties extendedTableProperties, Change[] changes) { + private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTableProperties extendedTableProperties, + Change[] changes, String clusterColumns) { CreateTableChange temp = (CreateTableChange) changes[0]; CreateTableChangeDatabricks createTableChangeDatabricks = new CreateTableChangeDatabricks(); createTableChangeDatabricks.setColumns(temp.getColumns()); @@ -55,12 +54,8 @@ private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTable createTableChangeDatabricks.setRemarks(temp.getRemarks()); createTableChangeDatabricks.setIfNotExists(temp.getIfNotExists()); createTableChangeDatabricks.setRowDependencies(temp.getRowDependencies()); - if(extendedTableProperties.getTblProperties().contains(CLUSTERING_INFORMATION_TBL_PROPERTY_START)) { - //This approach should be rewritten after implementing tblProperties map parsing to Map struct collection - String tblProperties = extendedTableProperties.getTblProperties(); - createTableChangeDatabricks.setClusterColumns(parseClusterColumns(tblProperties)); - //Remove clusteringColumns property from tblProperties - extendedTableProperties.setTblProperties(tblProperties.replace(getClusteringColumnsSubstring(tblProperties), "")); + if (!clusterColumns.isEmpty()) { + createTableChangeDatabricks.setClusterColumns(sanitizeClusterColumns(clusterColumns)); } createTableChangeDatabricks.setExtendedTableProperties(extendedTableProperties); @@ -72,43 +67,8 @@ protected CreateTableChange createCreateTableChange() { return new CreateTableChangeDatabricks(); } - /** - * There might be 2 edge cases with table properties map: - *
      • User specified a custom tblProperty that appears prior databricks internal managed - * clusteringColumns property and this custom tblProperty contains string snippet 'clusteringColumns=[['. - * Pattern and matcher would process properties incorrect if there was present structure ["anyString"] - * in between string snippet 'clusteringColumns=[[' and actual databricks managed property - * clusteringColumns. - *
      • User specified a custom table property that contains string snippet 'clusteringColumns=' and there are - * no clustering configured on the table.
          - * @param tblProperties - * The tblProperties map in a raw string format that returns as part of result set of - * DESCRIBE TABLE EXTENDED query. - * @return Coma separated clustering columns extracted from tblProperties - * */ - private String parseClusterColumns(String tblProperties) { - // Actual pattern - "\[\"([^\"]*)\"\]" - Pattern pattern = Pattern.compile("\\[\\\"([^\\\"]*)\\\"\\]"); - String clusteringColumnsRaw = getClusteringColumnsSubstring(tblProperties); - StringBuilder clusterColumnNames = new StringBuilder(); - try{ - Matcher matcher = pattern.matcher(clusteringColumnsRaw); - for (int i = 0; matcher.find(); i++) { - //getting first matching group to avoid quotes and brackets - String group = matcher.group(1); - clusterColumnNames.append(i == 0 ? "" : ",").append(group); - } - } catch (IllegalStateException e) { - e.printStackTrace(); - } - - return clusterColumnNames.toString(); - } - - private String getClusteringColumnsSubstring(String tblProperties) { - int clusterColumnsStartIndex = tblProperties.indexOf(CLUSTERING_INFORMATION_TBL_PROPERTY_START); - // To avoid appearance anywhere before clusteringColumns property we are specifying clusterColumnsStartIndex - // to start search from for end index snippet. - return tblProperties.substring(clusterColumnsStartIndex, tblProperties.indexOf("\"]],", clusterColumnsStartIndex) + 4); + private String sanitizeClusterColumns(String clusterColumnProperty) { + Pattern pattern = Pattern.compile("\\[|\\]|\\\""); + return clusterColumnProperty.replaceAll(pattern.toString(), ""); } } \ No newline at end of file diff --git a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java index d47260c1..814ffdc3 100644 --- a/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/snapshot/jvm/TableSnapshotGeneratorDatabricks.java @@ -11,14 +11,16 @@ import liquibase.structure.DatabaseObject; import liquibase.structure.core.Table; +import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; public class TableSnapshotGeneratorDatabricks extends TableSnapshotGenerator { private static final String LOCATION = "Location"; - private static final String TABLE_PROPERTIES = "Table Properties"; private static final String TBL_PROPERTIES = "tblProperties"; + private static final String CLUSTER_COLUMNS = "clusterColumns"; private static final String DETAILED_TABLE_INFORMATION_NODE = "# Detailed Table Information"; @Override @@ -49,13 +51,32 @@ protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot if (detailedInformationNode && tableProperty.get("COL_NAME").equals(LOCATION)) { table.setAttribute(LOCATION, tableProperty.get("DATA_TYPE")); } - if (detailedInformationNode && tableProperty.get("COL_NAME").equals(TABLE_PROPERTIES)) { - String tblProperties = (String) tableProperty.get("DATA_TYPE"); - table.setAttribute(TBL_PROPERTIES, tblProperties.substring(1, tblProperties.length() - 1));// remove starting and ending square brackets - } } + Map tblProperties = getTblPropertiesMap(database, example.getName()); + if (tblProperties.containsKey(CLUSTER_COLUMNS)) { + // used remove, as clusterColumns tblProperty is not allowed in create/alter table statements + table.setAttribute(CLUSTER_COLUMNS, tblProperties.remove(CLUSTER_COLUMNS)); + } + table.setAttribute(TBL_PROPERTIES, getTblPropertiesString(tblProperties)); } return table; } + private Map getTblPropertiesMap(Database database, String table) throws DatabaseException { + String query = String.format("SHOW TBLPROPERTIES %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), table); + List> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class) + .getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query)); + return tablePropertiesResponse.stream() + .collect(Collectors.toMap(mapElement -> (String) mapElement.get("KEY"), mapElement -> (String) mapElement.get("VALUE"))); + } + + private String getTblPropertiesString(Map propertiesMap) { + StringBuilder csvString = new StringBuilder(); + propertiesMap.entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) + .forEach(entry -> csvString.append(entry.getKey()).append("=").append(entry.getValue()).append(",")); + return csvString.toString().replaceAll(",$", ""); + } + } \ No newline at end of file From 6cb528a926c50e645e75474f41227f512c09048b Mon Sep 17 00:00:00 2001 From: Mykhailo Savchenko Date: Wed, 16 Oct 2024 14:24:49 +0300 Subject: [PATCH 11/11] DAT-18792: sonar lint issue fix - updated replace patern from single characters enumeration to character class. --- .../output/changelog/MissingTableChangeGeneratorDatabricks.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java index b6662b5e..2a821fb9 100644 --- a/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java +++ b/src/main/java/liquibase/ext/databricks/diff/output/changelog/MissingTableChangeGeneratorDatabricks.java @@ -68,7 +68,7 @@ protected CreateTableChange createCreateTableChange() { } private String sanitizeClusterColumns(String clusterColumnProperty) { - Pattern pattern = Pattern.compile("\\[|\\]|\\\""); + Pattern pattern = Pattern.compile("[\\[\\]\\\"]"); return clusterColumnProperty.replaceAll(pattern.toString(), ""); } } \ No newline at end of file