Skip to content

Commit

Permalink
DAT-18792: added extra DB call to fetch TBLPROPERIES map in key + val…
Browse files Browse the repository at this point in the history
…ue format.
  • Loading branch information
Mykhailo Savchenko committed Oct 16, 2024
1 parent a4747f1 commit eb0557d
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,10 @@
import liquibase.structure.DatabaseObject;
import liquibase.structure.core.Table;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MissingTableChangeGeneratorDatabricks extends MissingTableChangeGenerator {

private static final String CLUSTERING_INFORMATION_TBL_PROPERTY_START = "clusteringColumns=[[";

@Override
public int getPriority(Class<? extends DatabaseObject> objectType, Database database) {
if (database instanceof DatabricksDatabase && Table.class.isAssignableFrom(objectType)) {
Expand All @@ -38,12 +35,14 @@ public Change[] fixMissing(DatabaseObject missingObject, DiffOutputControl contr
ExtendedTableProperties extendedTableProperties = new ExtendedTableProperties(
missingObject.getAttribute("Location", String.class),
missingObject.getAttribute("tblProperties", String.class));
String clusterColumns = missingObject.getAttribute("clusterColumns", "");

changes[0] = getCreateTableChangeDatabricks(extendedTableProperties, changes);
changes[0] = getCreateTableChangeDatabricks(extendedTableProperties, changes, clusterColumns);
return changes;
}

private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTableProperties extendedTableProperties, Change[] changes) {
private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTableProperties extendedTableProperties,
Change[] changes, String clusterColumns) {
CreateTableChange temp = (CreateTableChange) changes[0];
CreateTableChangeDatabricks createTableChangeDatabricks = new CreateTableChangeDatabricks();
createTableChangeDatabricks.setColumns(temp.getColumns());
Expand All @@ -55,12 +54,8 @@ private CreateTableChangeDatabricks getCreateTableChangeDatabricks(ExtendedTable
createTableChangeDatabricks.setRemarks(temp.getRemarks());
createTableChangeDatabricks.setIfNotExists(temp.getIfNotExists());
createTableChangeDatabricks.setRowDependencies(temp.getRowDependencies());
if(extendedTableProperties.getTblProperties().contains(CLUSTERING_INFORMATION_TBL_PROPERTY_START)) {
//This approach should be rewritten after implementing tblProperties map parsing to Map struct collection
String tblProperties = extendedTableProperties.getTblProperties();
createTableChangeDatabricks.setClusterColumns(parseClusterColumns(tblProperties));
//Remove clusteringColumns property from tblProperties
extendedTableProperties.setTblProperties(tblProperties.replace(getClusteringColumnsSubstring(tblProperties), ""));
if (!clusterColumns.isEmpty()) {
createTableChangeDatabricks.setClusterColumns(sanitizeClusterColumns(clusterColumns));
}

createTableChangeDatabricks.setExtendedTableProperties(extendedTableProperties);
Expand All @@ -72,43 +67,8 @@ protected CreateTableChange createCreateTableChange() {
return new CreateTableChangeDatabricks();
}

/**
* There might be 2 edge cases with table properties map:
* <ul><li>User specified a custom tblProperty that appears prior databricks internal managed
* clusteringColumns property and this custom tblProperty contains string snippet 'clusteringColumns=[['.
* Pattern and matcher would process properties incorrect if there was present structure ["anyString"]
* in between string snippet 'clusteringColumns=[[' and actual databricks managed property
* clusteringColumns.
* <li>User specified a custom table property that contains string snippet 'clusteringColumns=' and there are
* no clustering configured on the table.<ul/>
* @param tblProperties
* The tblProperties map in a raw string format that returns as part of result set of
* DESCRIBE TABLE EXTENDED query.
* @return Coma separated clustering columns extracted from tblProperties
* */
private String parseClusterColumns(String tblProperties) {
// Actual pattern - "\[\"([^\"]*)\"\]"
Pattern pattern = Pattern.compile("\\[\\\"([^\\\"]*)\\\"\\]");
String clusteringColumnsRaw = getClusteringColumnsSubstring(tblProperties);
StringBuilder clusterColumnNames = new StringBuilder();
try{
Matcher matcher = pattern.matcher(clusteringColumnsRaw);
for (int i = 0; matcher.find(); i++) {
//getting first matching group to avoid quotes and brackets
String group = matcher.group(1);
clusterColumnNames.append(i == 0 ? "" : ",").append(group);
}
} catch (IllegalStateException e) {
e.printStackTrace();
}

return clusterColumnNames.toString();
}

private String getClusteringColumnsSubstring(String tblProperties) {
int clusterColumnsStartIndex = tblProperties.indexOf(CLUSTERING_INFORMATION_TBL_PROPERTY_START);
// To avoid appearance anywhere before clusteringColumns property we are specifying clusterColumnsStartIndex
// to start search from for end index snippet.
return tblProperties.substring(clusterColumnsStartIndex, tblProperties.indexOf("\"]],", clusterColumnsStartIndex) + 4);
private String sanitizeClusterColumns(String clusterColumnProperty) {
Pattern pattern = Pattern.compile("\\[|\\]|\\\"");
return clusterColumnProperty.replaceAll(pattern.toString(), "");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
import liquibase.structure.DatabaseObject;
import liquibase.structure.core.Table;

import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class TableSnapshotGeneratorDatabricks extends TableSnapshotGenerator {

private static final String LOCATION = "Location";
private static final String TABLE_PROPERTIES = "Table Properties";
private static final String TBL_PROPERTIES = "tblProperties";
private static final String CLUSTER_COLUMNS = "clusterColumns";
private static final String DETAILED_TABLE_INFORMATION_NODE = "# Detailed Table Information";

@Override
Expand Down Expand Up @@ -49,13 +51,32 @@ protected DatabaseObject snapshotObject(DatabaseObject example, DatabaseSnapshot
if (detailedInformationNode && tableProperty.get("COL_NAME").equals(LOCATION)) {
table.setAttribute(LOCATION, tableProperty.get("DATA_TYPE"));
}
if (detailedInformationNode && tableProperty.get("COL_NAME").equals(TABLE_PROPERTIES)) {
String tblProperties = (String) tableProperty.get("DATA_TYPE");
table.setAttribute(TBL_PROPERTIES, tblProperties.substring(1, tblProperties.length() - 1));// remove starting and ending square brackets
}
}
Map<String, String> tblProperties = getTblPropertiesMap(database, example.getName());
if (tblProperties.containsKey(CLUSTER_COLUMNS)) {
// used remove, as clusterColumns tblProperty is not allowed in create/alter table statements
table.setAttribute(CLUSTER_COLUMNS, tblProperties.remove(CLUSTER_COLUMNS));
}
table.setAttribute(TBL_PROPERTIES, getTblPropertiesString(tblProperties));
}
return table;
}

private Map<String, String> getTblPropertiesMap(Database database, String table) throws DatabaseException {
String query = String.format("SHOW TBLPROPERTIES %s.%s.%s;", database.getDefaultCatalogName(), database.getDefaultSchemaName(), table);
List<Map<String, ?>> tablePropertiesResponse = Scope.getCurrentScope().getSingleton(ExecutorService.class)
.getExecutor("jdbc", database).queryForList(new RawParameterizedSqlStatement(query));
return tablePropertiesResponse.stream()
.collect(Collectors.toMap(mapElement -> (String) mapElement.get("KEY"), mapElement -> (String) mapElement.get("VALUE")));
}

private String getTblPropertiesString(Map<String, String> propertiesMap) {
StringBuilder csvString = new StringBuilder();
propertiesMap.entrySet()
.stream()
.sorted(Map.Entry.comparingByKey())
.forEach(entry -> csvString.append(entry.getKey()).append("=").append(entry.getValue()).append(","));
return csvString.toString().replaceAll(",$", "");
}

}

0 comments on commit eb0557d

Please sign in to comment.