From 32a2656562ae5cb3036b088fc3f7c64b6b8810d6 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Wed, 4 Sep 2024 15:05:26 -0400 Subject: [PATCH 01/15] Attribute isRepeated flag. ValidateDataTypes indexing job. --- .../job/bigquery/ValidateDataTypes.java | 12 ++++++++ .../terra/tanagra/underlay/ColumnSchema.java | 18 ++++++++++-- .../bio/terra/tanagra/underlay/Underlay.java | 1 + .../underlay/entitymodel/Attribute.java | 9 ++++++ .../underlay/indextable/ITEntityMain.java | 3 +- .../underlay/serialization/SZAttribute.java | 7 +++++ .../sourcetable/STEntityAttributes.java | 4 ++- .../datamapping/aouCT/entity/variant/all.sql | 10 +++++++ .../aouCT/entity/variant/entity.json | 17 +++++++++++ .../config/indexer/aouSC2023Q3R2_verily.json | 28 +++++++++++++++++++ .../underlay/aouSC2023Q3R2/underlay.json | 4 ++- .../bigquery/sqlbuilding/BQFieldTest.java | 2 ++ 12 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json create mode 100644 underlay/src/main/resources/config/indexer/aouSC2023Q3R2_verily.json diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/ValidateDataTypes.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/ValidateDataTypes.java index 4870f40a1..6b826e184 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/ValidateDataTypes.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/ValidateDataTypes.java @@ -104,6 +104,18 @@ public void run(boolean isDryRun) { .collect(Collectors.joining(",", "[", "]")), sourceQueryField.getType()); } + + // Check that the schema repeated flags match those of the index table columns. + boolean sourceQueryFieldIsRepeated = sourceQueryField.getMode().equals(Field.Mode.REPEATED); + boolean isRepeatedFlagsMatch = attribute.isDataTypeRepeated() == sourceQueryFieldIsRepeated; + if (!isRepeatedFlagsMatch) { + foundError = true; + LOGGER.info( + "Data type repeated mismatch found for attribute {}: entity declared {}, SQL schema returns {}", + attribute.getName(), + attribute.isDataTypeRepeated(), + sourceQueryField.getMode()); + } } if (foundError) { throw new InvalidConfigException("Data type mismatch found for entity: " + entity.getName()); diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/ColumnSchema.java b/underlay/src/main/java/bio/terra/tanagra/underlay/ColumnSchema.java index 350cb9e22..c950fea02 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/ColumnSchema.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/ColumnSchema.java @@ -7,15 +7,22 @@ public class ColumnSchema implements Serializable { private final String columnName; private final DataType dataType; + private final boolean isDataTypeRepeated; private final boolean isRequired; public ColumnSchema(String columnName, DataType dataType) { - this(columnName, dataType, false); + this(columnName, dataType, false, false); } public ColumnSchema(String columnName, DataType dataType, boolean isRequired) { + this(columnName, dataType, false, isRequired); + } + + public ColumnSchema( + String columnName, DataType dataType, boolean isDataTypeRepeated, boolean isRequired) { this.columnName = columnName; this.dataType = dataType; + this.isDataTypeRepeated = isDataTypeRepeated; this.isRequired = isRequired; } @@ -27,6 +34,10 @@ public DataType getDataType() { return dataType; } + public boolean isDataTypeRepeated() { + return isDataTypeRepeated; + } + public boolean isRequired() { return isRequired; } @@ -42,11 +53,12 @@ public boolean equals(Object o) { ColumnSchema that = (ColumnSchema) o; return isRequired == that.isRequired && columnName.equals(that.columnName) - && dataType == that.dataType; + && dataType == that.dataType + && isDataTypeRepeated == that.isDataTypeRepeated; } @Override public int hashCode() { - return Objects.hash(columnName, dataType, isRequired); + return Objects.hash(columnName, dataType, isDataTypeRepeated, isRequired); } } diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java b/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java index 3a64b4e26..f37a8f441 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java @@ -372,6 +372,7 @@ public static Entity fromConfigEntity(SZEntity szEntity, String primaryEntityNam return new Attribute( szAttribute.name, ConfigReader.deserializeDataType(szAttribute.dataType), + szAttribute.isDataTypeRepeated, szAttribute.displayFieldName != null, szAttribute.name.equals(szEntity.idAttribute), szAttribute.runtimeSqlFunctionWrapper, diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/Attribute.java b/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/Attribute.java index e5cf9ec15..f698fe80a 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/Attribute.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/Attribute.java @@ -6,6 +6,7 @@ public final class Attribute { private final String name; private final DataType dataType; + private final boolean isDataTypeRepeated; private final boolean isValueDisplay; private final boolean isId; private final String runtimeSqlFunctionWrapper; @@ -20,6 +21,7 @@ public final class Attribute { public Attribute( String name, DataType dataType, + boolean isDataTypeRepeated, boolean isValueDisplay, boolean isId, String runtimeSqlFunctionWrapper, @@ -31,6 +33,7 @@ public Attribute( SourceQuery sourceQuery) { this.name = name; this.dataType = dataType; + this.isDataTypeRepeated = isDataTypeRepeated; this.isValueDisplay = isValueDisplay; this.isId = isId; this.runtimeSqlFunctionWrapper = runtimeSqlFunctionWrapper; @@ -50,6 +53,10 @@ public DataType getDataType() { return dataType; } + public boolean isDataTypeRepeated() { + return isDataTypeRepeated; + } + public boolean isSimple() { return !isValueDisplay; } @@ -111,6 +118,7 @@ public boolean equals(Object o) { && isVisitIdForTemporalQuery == attribute.isVisitIdForTemporalQuery && name.equals(attribute.name) && dataType == attribute.dataType + && isDataTypeRepeated == attribute.isDataTypeRepeated && Objects.equals(runtimeSqlFunctionWrapper, attribute.runtimeSqlFunctionWrapper) && runtimeDataType == attribute.runtimeDataType && Objects.equals(sourceQuery, attribute.sourceQuery); @@ -121,6 +129,7 @@ public int hashCode() { return Objects.hash( name, dataType, + isDataTypeRepeated, isValueDisplay, isId, runtimeSqlFunctionWrapper, diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java index 9572f4e9a..6d9c3b5a4 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java @@ -107,7 +107,8 @@ public SqlField getAttributeValueField(String attribute) { } public ColumnSchema getAttributeValueColumnSchema(Attribute attribute) { - return new ColumnSchema(attribute.getName(), attribute.getDataType()); + return new ColumnSchema( + attribute.getName(), attribute.getDataType(), attribute.isDataTypeRepeated(), false); } public SqlField getAttributeDisplayField(String attribute) { diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttribute.java b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttribute.java index 19aa404fa..54748d802 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttribute.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttribute.java @@ -23,6 +23,13 @@ public class SZAttribute { @AnnotatedField(name = "SZAttribute.dataType", markdown = "Data type of the attribute.") public SZDataType dataType; + @AnnotatedField( + name = "SZAttribute.isDataTypeRepeated", + markdown = "True if the data type is repeated (e.g. an array of ints).", + optional = true, + defaultValue = "false") + public boolean isDataTypeRepeated; + @AnnotatedField( name = "SZAttribute.valueFieldName", markdown = diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STEntityAttributes.java b/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STEntityAttributes.java index 0a0635e74..5ea61742c 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STEntityAttributes.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STEntityAttributes.java @@ -32,7 +32,9 @@ public STEntityAttributes(BQTable bqTable, String entity, List szAt szAttribute.valueFieldName == null ? szAttribute.name : szAttribute.valueFieldName, - ConfigReader.deserializeDataType(szAttribute.dataType))); + ConfigReader.deserializeDataType(szAttribute.dataType), + szAttribute.isDataTypeRepeated, + false)); if (szAttribute.displayFieldName != null) { attributeDisplayColumnSchemasBuilder.put( szAttribute.name, new ColumnSchema(szAttribute.displayFieldName, DataType.STRING)); diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql new file mode 100644 index 000000000..44729b870 --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql @@ -0,0 +1,10 @@ +SELECT ROW_NUMBER() OVER (ORDER BY vid) AS row_num, + vid, + gene_symbol, + consequence, + aa_change, + clinvar_classification, + gvs_all_ac, + gvs_all_an, + gvs_all_af +FROM `${omopDataset}.prep_vat` diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json new file mode 100644 index 000000000..6035eb383 --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json @@ -0,0 +1,17 @@ +{ + "name": "variant", + "allInstancesSqlFile": "all.sql", + "attributes": [ + { "name": "id", "dataType": "INT64", "valueFieldName": "row_num" }, + { "name": "variant_id", "dataType": "STRING", "valueFieldName": "vid" }, + { "name": "gene", "dataType": "STRING", "valueFieldName": "gene_symbol", "isComputeDisplayHint": true }, + { "name": "consequence", "dataType": "STRING", "isDataTypeRepeated": true, "isComputeDisplayHint": true }, + { "name": "protein_change", "dataType": "STRING", "valueFieldName": "aa_change" }, + { "name": "clinvar_significance", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "clinvar_classification", "isComputeDisplayHint": true }, + { "name": "allele_count", "dataType": "INT64", "valueFieldName": "gvs_all_ac", "isComputeDisplayHint": true }, + { "name": "allele_number", "dataType": "INT64", "valueFieldName": "gvs_all_an", "isComputeDisplayHint": true }, + { "name": "allele_frequency", "dataType": "DOUBLE", "valueFieldName": "gvs_all_af", "isComputeDisplayHint": true } + ], + "idAttribute": "id", + "optimizeGroupByAttributes": [ "gender", "race", "age", "ethnicity" ] +} diff --git a/underlay/src/main/resources/config/indexer/aouSC2023Q3R2_verily.json b/underlay/src/main/resources/config/indexer/aouSC2023Q3R2_verily.json new file mode 100644 index 000000000..f7f920116 --- /dev/null +++ b/underlay/src/main/resources/config/indexer/aouSC2023Q3R2_verily.json @@ -0,0 +1,28 @@ +{ + "underlay": "aouSC2023Q3R2", + "bigQuery": { + "sourceData": { + "projectId": "verily-tanagra-dev", + "datasetId": "aou_test_data_SC2023Q3R2", + "sqlSubstitutions": { + "omopDataset": "verily-tanagra-dev.aou_test_data_SC2023Q3R2", + "staticTablesDataset": "verily-tanagra-dev.aou_static_prep" + } + }, + "indexData": { + "projectId": "verily-tanagra-dev", + "datasetId": "aou_test_data_SC2023Q3R2_index_090424", + "tablePrefix": "T" + }, + "queryProjectId": "verily-tanagra-dev", + "dataLocation": "us" + }, + "dataflow": { + "serviceAccountEmail": "backend-default@verily-tanagra-dev.iam.gserviceaccount.com", + "dataflowLocation": "us-central1", + "gcsTempDirectory": "gs://dataflow-staging-us-central1-694046000181/temp/", + "workerMachineType": "n1-standard-4", + "usePublicIps": false, + "vpcSubnetworkName": null + } +} diff --git a/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json b/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json index 70d3e4111..c8c9f4ff1 100644 --- a/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json +++ b/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json @@ -56,7 +56,9 @@ "aouRT/surveyPersonalAndFamilyHealth", "aouRT/surveyHealthCareAccess", "aouRT/surveySocialDeterminantsOfHealth", - "aouRT/surveyOccurrence" + "aouRT/surveyOccurrence", + + "aouCT/variant" ], "groupItemsEntityGroups": [ "aouRT/brandIngredientConcept", diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQFieldTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQFieldTest.java index fc3b0462a..12ba89b06 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQFieldTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQFieldTest.java @@ -88,6 +88,7 @@ void attributeFieldAgainstSourceData() throws IOException { new Attribute( "ethnicityNoDisplayJoin", ethnicityAttribute.getDataType(), + ethnicityAttribute.isDataTypeRepeated(), ethnicityAttribute.isValueDisplay(), ethnicityAttribute.isId(), ethnicityAttribute.getRuntimeSqlFunctionWrapper(), @@ -110,6 +111,7 @@ void attributeFieldAgainstSourceData() throws IOException { new Attribute( "genderSuppressed", genderAttribute.getDataType(), + genderAttribute.isDataTypeRepeated(), genderAttribute.isValueDisplay(), genderAttribute.isId(), genderAttribute.getRuntimeSqlFunctionWrapper(), From eb7919228e2d0d111cb4f7d7e1af78e966ae8482 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Wed, 4 Sep 2024 15:14:49 -0400 Subject: [PATCH 02/15] CreateEntityMainn indexing job. --- .../tanagra/indexing/job/bigquery/CreateEntityMain.java | 4 ++++ .../bio/terra/tanagra/underlay/indextable/ITEntityMain.java | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/CreateEntityMain.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/CreateEntityMain.java index 642d5b167..90bfb1c51 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/CreateEntityMain.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/CreateEntityMain.java @@ -47,6 +47,10 @@ public void run(boolean isDryRun) { Field.newBuilder( columnSchema.getColumnName(), BigQueryBeamUtils.fromDataType(columnSchema.getDataType())) + .setMode( + columnSchema.isDataTypeRepeated() + ? Field.Mode.REPEATED + : Field.Mode.NULLABLE) .build()) .collect(Collectors.toList()); diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java index 6d9c3b5a4..bb4edef91 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java @@ -38,7 +38,10 @@ public ITEntityMain( szAttribute -> { columnSchemasBuilder.add( new ColumnSchema( - szAttribute.name, ConfigReader.deserializeDataType(szAttribute.dataType))); + szAttribute.name, + ConfigReader.deserializeDataType(szAttribute.dataType), + szAttribute.isDataTypeRepeated, + false)); if (szAttribute.displayFieldName != null) { columnSchemasBuilder.add( new ColumnSchema( From 5d54178de8aa4a72f8e383abf909012762117fe2 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Thu, 5 Sep 2024 16:07:16 -0400 Subject: [PATCH 03/15] WriteEntityLevelDisplayHints indexing job. --- .../WriteEntityLevelDisplayHints.java | 104 +++++++++++++++++- 1 file changed, 98 insertions(+), 6 deletions(-) diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java index 92ba8862a..8095012a0 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java @@ -113,8 +113,9 @@ public void run(boolean isDryRun) { attribute.getName(), minMax.getKey(), minMax.getValue()); - } else if (isEnumHint(attribute)) { - List> enumCounts = computeEnumHint(attribute, isDryRun); + } else if (isEnumHintForValueDisplay(attribute)) { + List> enumCounts = + computeEnumHintForValueDisplay(attribute, isDryRun); enumCounts.forEach( enumCount -> { List rowOfLiterals = new ArrayList<>(); @@ -126,12 +127,32 @@ public void run(boolean isDryRun) { rowOfLiterals.add(Literal.forInt64(enumCount.getValue())); insertRows.add(rowOfLiterals); LOGGER.info( - "Enum hint: {}, {}, {}, {}", + "Enum value-display hint: {}, {}, {}, {}", attribute.getName(), enumCount.getKey().getValue(), enumCount.getKey().getDisplay(), enumCount.getValue()); }); + } else if (isEnumHintForRepeatedStringValue(attribute)) { + List> enumCounts = + computeEnumHintForRepeatedStringValue(attribute, isDryRun); + enumCounts.forEach( + enumCount -> { + List rowOfLiterals = new ArrayList<>(); + rowOfLiterals.add(Literal.forString(attribute.getName())); + rowOfLiterals.add(Literal.forDouble(null)); + rowOfLiterals.add(Literal.forDouble(null)); + rowOfLiterals.add(Literal.forInt64(null)); + rowOfLiterals.add(enumCount.getKey()); + rowOfLiterals.add(Literal.forInt64(enumCount.getValue())); + insertRows.add(rowOfLiterals); + LOGGER.info( + "Enum repeated-string hint: {}, {}, {}, {}", + attribute.getName(), + null, + enumCount.getKey(), + enumCount.getValue()); + }); } else { LOGGER.info( "Attribute {} data type {} not yet supported for computing hint", @@ -205,16 +226,23 @@ public void run(boolean isDryRun) { } } - private static boolean isEnumHint(Attribute attribute) { + private static boolean isEnumHintForValueDisplay(Attribute attribute) { return attribute.isValueDisplay() && DataType.INT64.equals(attribute.getRuntimeDataType()); } + private static boolean isEnumHintForRepeatedStringValue(Attribute attribute) { + // TODO: Support not-repeated string simple attributes & repeated integer attributes. + return attribute.isSimple() + && attribute.isDataTypeRepeated() + && (DataType.STRING.equals(attribute.getDataType())); + } + private static boolean isRangeHint(Attribute attribute) { if (attribute.isValueDisplay()) { return false; } return switch (attribute.getRuntimeDataType()) { - case DOUBLE, INT64 -> true; + case DOUBLE, INT64 -> !attribute.isDataTypeRepeated(); default -> false; }; } @@ -275,7 +303,8 @@ private Pair computeRangeHint(Attribute attribute, boolean isD } } - private List> computeEnumHint(Attribute attribute, boolean isDryRun) { + private List> computeEnumHintForValueDisplay( + Attribute attribute, boolean isDryRun) { // Build the query. // SELECT attrVal AS enumVal, attrDisp AS enumDisp, COUNT(*) AS enumCount FROM indextable GROUP // BY enumVal, enumDisp @@ -347,4 +376,67 @@ private List> computeEnumHint(Attribute attribute, bool }); return enumCounts; } + + private List> computeEnumHintForRepeatedStringValue( + Attribute attribute, boolean isDryRun) { + // TODO: Consolidate the logic here with the ValueDisplay enum hint method. + // Build the query. + // SELECT flattenedAttrVal AS enumVal, COUNT(*) AS enumCount FROM indextable + // CROSS JOIN UNNEST(indextable.attrVal) AS flattenedAttrVal + // GROUP BY enumVal + SqlField attrValField = indexAttributesTable.getAttributeValueField(attribute.getName()); + final String enumValAlias = "enumVal"; + final String enumCountAlias = "enumCount"; + final String flattenedAttrValAlias = "flattenedAttrVal"; + SqlField flattenedAttrValField = SqlField.of(flattenedAttrValAlias); + + String selectEnumCountSql = + "SELECT " + + SqlQueryField.of(flattenedAttrValField, enumValAlias).renderForSelect() + + ", COUNT(*) AS " + + enumCountAlias + + " FROM " + + indexAttributesTable.getTablePointer().render() + + " CROSS JOIN UNNEST(" + + SqlQueryField.of(attrValField).renderForSelect() + + ") AS " + + flattenedAttrValAlias + + " GROUP BY " + + SqlQueryField.of(attrValField, enumValAlias).renderForGroupBy(null, true); + LOGGER.info("SQL enum count: {}", selectEnumCountSql); + + // Execute the query. + List> enumCounts = new ArrayList<>(); + if (isDryRun) { + if (getOutputTable().isEmpty()) { + LOGGER.info("Skipping query dry run because output table does not exist yet."); + } else { + googleBigQuery.dryRunQuery(selectEnumCountSql); + } + enumCounts.add(Pair.of(Literal.forString(""), 0L)); + } else { + TableResult tableResult = googleBigQuery.runQueryLongTimeout(selectEnumCountSql); + + // Parse the result rows. + for (FieldValueList rowResult : tableResult.getValues()) { + FieldValue enumValFieldValue = rowResult.get(enumValAlias); + Literal enumVal = + Literal.forString( + enumValFieldValue.isNull() ? null : enumValFieldValue.getStringValue()); + FieldValue enumCountFieldValue = rowResult.get(enumCountAlias); + long enumCount = enumCountFieldValue.getLongValue(); + enumCounts.add(Pair.of(enumVal, enumCount)); + + if (enumCounts.size() > MAX_ENUM_VALS_FOR_DISPLAY_HINT) { + // if there are more than the max number of values, then skip the display hint + LOGGER.info( + "Skipping enum values display hint because there are >{} possible values: {}", + MAX_ENUM_VALS_FOR_DISPLAY_HINT, + attribute.getName()); + return List.of(); + } + } + } + return enumCounts; + } } From d4b6921c2d92e56828c81f700eea736506e5fae8 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Thu, 5 Sep 2024 16:24:39 -0400 Subject: [PATCH 04/15] WriteTextSearchField indexing job. --- .../job/bigquery/WriteTextSearchField.java | 19 ++++++++++++++----- .../aouCT/entity/variant/entity.json | 3 +++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteTextSearchField.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteTextSearchField.java index ec14e6e6f..53ce03bcb 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteTextSearchField.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteTextSearchField.java @@ -69,13 +69,22 @@ public void run(boolean isDryRun) { SqlField attributeTextField; if (attribute.isValueDisplay()) { attributeTextField = indexTable.getAttributeDisplayField(attribute.getName()); - } else if (!attribute.getDataType().equals(DataType.STRING)) { - attributeTextField = - indexTable - .getAttributeValueField(attribute.getName()) - .cloneWithFunctionWrapper("CAST(${fieldSql} AS STRING)"); } else { + String functionWrapper = null; + if (!attribute.getDataType().equals(DataType.STRING)) { + functionWrapper = "CAST(${fieldSql} AS STRING)"; + } + if (attribute.isDataTypeRepeated()) { + functionWrapper = + "ARRAY_TO_STRING(" + + (functionWrapper == null ? "${fieldSql}" : functionWrapper) + + ", \" \")"; + } + attributeTextField = indexTable.getAttributeValueField(attribute.getName()); + if (functionWrapper != null) { + attributeTextField = attributeTextField.cloneWithFunctionWrapper(functionWrapper); + } } String idTextSql = diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json index 6035eb383..bfa47a358 100644 --- a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json +++ b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json @@ -13,5 +13,8 @@ { "name": "allele_frequency", "dataType": "DOUBLE", "valueFieldName": "gvs_all_af", "isComputeDisplayHint": true } ], "idAttribute": "id", + "textSearch": { + "attributes": [ "gene", "clinvar_significance" ] + }, "optimizeGroupByAttributes": [ "gender", "race", "age", "ethnicity" ] } From 519f3cd1ec39f3ca7e1a41e11430fd5313965596 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Fri, 6 Sep 2024 10:58:08 -0400 Subject: [PATCH 05/15] Select repeated attr field. Result parsing test list query. --- .../tanagra/api/shared/ValueDisplay.java | 26 ++++++++++++-- .../tanagra/query/bigquery/BQRowResult.java | 17 +++++++--- .../field/BQAttributeFieldTranslator.java | 21 ++++++++---- .../terra/tanagra/query/sql/SqlRowResult.java | 4 +++ .../resultparsing/BQListQueryResultsTest.java | 34 +++++++++++++++++-- 5 files changed, 85 insertions(+), 17 deletions(-) diff --git a/underlay/src/main/java/bio/terra/tanagra/api/shared/ValueDisplay.java b/underlay/src/main/java/bio/terra/tanagra/api/shared/ValueDisplay.java index 8e99e5b4a..f637ae437 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/shared/ValueDisplay.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/shared/ValueDisplay.java @@ -1,19 +1,29 @@ package bio.terra.tanagra.api.shared; -import java.util.Objects; +import com.google.common.collect.*; +import java.util.*; public class ValueDisplay { private final Literal value; private final String display; + private final ImmutableList repeatedValue; public ValueDisplay(Literal value) { this.value = value; this.display = null; + this.repeatedValue = null; } public ValueDisplay(Literal value, String display) { this.value = value; this.display = display; + this.repeatedValue = null; + } + + public ValueDisplay(List repeatedValue) { + this.value = null; + this.display = null; + this.repeatedValue = ImmutableList.copyOf(repeatedValue); } public Literal getValue() { @@ -24,6 +34,14 @@ public String getDisplay() { return display; } + public List getRepeatedValue() { + return repeatedValue; + } + + public boolean isRepeatedValue() { + return repeatedValue != null; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -33,11 +51,13 @@ public boolean equals(Object o) { return false; } ValueDisplay that = (ValueDisplay) o; - return value.equals(that.value) && Objects.equals(display, that.display); + return Objects.equals(value, that.value) + && Objects.equals(display, that.display) + && Objects.equals(repeatedValue, that.repeatedValue); } @Override public int hashCode() { - return Objects.hash(value, display); + return Objects.hash(value, display, repeatedValue); } } diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQRowResult.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQRowResult.java index 5223d1e23..eb76ea3e8 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQRowResult.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQRowResult.java @@ -2,11 +2,11 @@ import bio.terra.tanagra.api.shared.DataType; import bio.terra.tanagra.api.shared.Literal; -import bio.terra.tanagra.exception.InvalidQueryException; import bio.terra.tanagra.query.sql.SqlRowResult; import com.google.cloud.bigquery.FieldValue; import com.google.cloud.bigquery.FieldValueList; import java.sql.Timestamp; +import java.util.*; public class BQRowResult implements SqlRowResult { private final FieldValueList fieldValues; @@ -18,6 +18,18 @@ public BQRowResult(FieldValueList fieldValues) { @Override public Literal get(String columnName, DataType expectedDataType) { FieldValue fieldValue = fieldValues.get(columnName); + return toLiteral(fieldValue, expectedDataType); + } + + @Override + public List getRepeated(String columnName, DataType expectedDataType) { + List repeatedFieldValue = fieldValues.get(columnName).getRepeatedValue(); + return repeatedFieldValue.stream() + .map(fieldValue -> toLiteral(fieldValue, expectedDataType)) + .toList(); + } + + private static Literal toLiteral(FieldValue fieldValue, DataType expectedDataType) { return switch (expectedDataType) { case STRING -> Literal.forString(fieldValue.isNull() ? null : fieldValue.getStringValue()); case INT64 -> Literal.forInt64(fieldValue.isNull() ? null : fieldValue.getLongValue()); @@ -27,9 +39,6 @@ public Literal get(String columnName, DataType expectedDataType) { case TIMESTAMP -> Literal.forTimestamp( fieldValue.isNull() ? null : Timestamp.from(fieldValue.getTimestampInstant())); - default -> - throw new InvalidQueryException( - "Unsupported data type for BigQuery row result: " + expectedDataType); }; } diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java index a6727ab7b..2a7198ade 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java @@ -106,14 +106,21 @@ private String getDisplayFieldAlias() { @Override public ValueDisplay parseValueDisplayFromResult(SqlRowResult sqlRowResult) { - Literal valueField = - sqlRowResult.get(getValueFieldAlias(), attributeField.getAttribute().getRuntimeDataType()); - - if (attributeField.getAttribute().isSimple() || attributeField.isExcludeDisplay()) { - return new ValueDisplay(valueField); + if (attributeField.getAttribute().isDataTypeRepeated()) { + List repeatedValueField = + sqlRowResult.getRepeated( + getValueFieldAlias(), attributeField.getAttribute().getRuntimeDataType()); + return new ValueDisplay(repeatedValueField); } else { - Literal displayField = sqlRowResult.get(getDisplayFieldAlias(), DataType.STRING); - return new ValueDisplay(valueField, displayField.getStringVal()); + Literal valueField = + sqlRowResult.get( + getValueFieldAlias(), attributeField.getAttribute().getRuntimeDataType()); + if (attributeField.getAttribute().isSimple() || attributeField.isExcludeDisplay()) { + return new ValueDisplay(valueField); + } else { + Literal displayField = sqlRowResult.get(getDisplayFieldAlias(), DataType.STRING); + return new ValueDisplay(valueField, displayField.getStringVal()); + } } } diff --git a/underlay/src/main/java/bio/terra/tanagra/query/sql/SqlRowResult.java b/underlay/src/main/java/bio/terra/tanagra/query/sql/SqlRowResult.java index 35dd392b2..a9c124e6f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/sql/SqlRowResult.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/sql/SqlRowResult.java @@ -2,11 +2,15 @@ import bio.terra.tanagra.api.shared.DataType; import bio.terra.tanagra.api.shared.Literal; +import java.util.*; public interface SqlRowResult { /** Get literal value for the column in this row. */ Literal get(String columnName, DataType expectedDataType); + /** Get literal values for the repeated column in this row. */ + List getRepeated(String columnName, DataType expectedDataType); + /** Return the number of {@link bio.terra.tanagra.api.shared.Literal}s in this row. */ int size(); } diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQListQueryResultsTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQListQueryResultsTest.java index a49b528c3..7d49fe244 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQListQueryResultsTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQListQueryResultsTest.java @@ -20,8 +20,7 @@ import bio.terra.tanagra.api.shared.OrderByDirection; import bio.terra.tanagra.api.shared.ValueDisplay; import bio.terra.tanagra.query.bigquery.BQRunnerTest; -import bio.terra.tanagra.underlay.entitymodel.Entity; -import bio.terra.tanagra.underlay.entitymodel.Hierarchy; +import bio.terra.tanagra.underlay.entitymodel.*; import bio.terra.tanagra.underlay.entitymodel.entitygroup.EntityGroup; import java.util.List; import org.junit.jupiter.api.Test; @@ -41,12 +40,34 @@ void attributeField() { AttributeField idAttribute = new AttributeField(underlay, entity, entity.getIdAttribute(), false); + // We don't have an example of an attribute with a repeated data type, yet. + // So create an artificial attribute just for this test. + AttributeField repeatedStringAttribute = + new AttributeField( + underlay, + entity, + new Attribute( + "person_source_value", + DataType.STRING, + true, + false, + false, + "['foo', 'bar', 'baz', ${fieldSql}]", + DataType.STRING, + entity.getIdAttribute().isComputeDisplayHint(), + entity.getIdAttribute().isSuppressedForExport(), + entity.getIdAttribute().isVisitDateForTemporalQuery(), + entity.getIdAttribute().isVisitIdForTemporalQuery(), + entity.getIdAttribute().getSourceQuery()), + false); + List selectAttributes = List.of( simpleAttribute, valueDisplayAttribute, valueDisplayAttributeWithoutDisplay, - runtimeCalculatedAttribute); + runtimeCalculatedAttribute, + repeatedStringAttribute); List orderBys = List.of(new OrderBy(idAttribute, OrderByDirection.DESCENDING)); int limit = 5; ListQueryResult listQueryResult = @@ -92,6 +113,13 @@ void attributeField() { age.getValue().isNull() || DataType.INT64.equals(age.getValue().getDataType())); assertNotNull(age.getValue().getInt64Val()); assertNull(age.getDisplay()); + + ValueDisplay repeatedString = + listInstance.getEntityFieldValue(repeatedStringAttribute); + assertNotNull(repeatedString); + assertTrue(repeatedString.isRepeatedValue()); + assertNotNull(repeatedString.getRepeatedValue()); + assertEquals(4, repeatedString.getRepeatedValue().size()); }); } From c209b0fab4cac57c3932ff45273e9c17607d3517 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Mon, 9 Sep 2024 09:56:21 -0400 Subject: [PATCH 06/15] Fetch string-only hints. Result parsing test hint query. --- .../WriteEntityLevelDisplayHints.java | 94 ++++++++++++------- .../tanagra/query/bigquery/BQQueryRunner.java | 10 +- .../resultparsing/BQHintQueryResultsTest.java | 14 ++- 3 files changed, 79 insertions(+), 39 deletions(-) diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java index 8095012a0..588c63f8f 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java @@ -118,19 +118,28 @@ public void run(boolean isDryRun) { computeEnumHintForValueDisplay(attribute, isDryRun); enumCounts.forEach( enumCount -> { + Literal int64Field = + attribute.isValueDisplay() + ? enumCount.getKey().getValue() + : Literal.forInt64(null); + Literal stringField = + attribute.isValueDisplay() + ? Literal.forString(enumCount.getKey().getDisplay()) + : enumCount.getKey().getValue(); + List rowOfLiterals = new ArrayList<>(); rowOfLiterals.add(Literal.forString(attribute.getName())); rowOfLiterals.add(Literal.forDouble(null)); rowOfLiterals.add(Literal.forDouble(null)); - rowOfLiterals.add(enumCount.getKey().getValue()); - rowOfLiterals.add(Literal.forString(enumCount.getKey().getDisplay())); + rowOfLiterals.add(int64Field); + rowOfLiterals.add(stringField); rowOfLiterals.add(Literal.forInt64(enumCount.getValue())); insertRows.add(rowOfLiterals); LOGGER.info( - "Enum value-display hint: {}, {}, {}, {}", + "Enum value-display or simple-string hint: {}, {}, {}, {}", attribute.getName(), - enumCount.getKey().getValue(), - enumCount.getKey().getDisplay(), + int64Field, + stringField, enumCount.getValue()); }); } else if (isEnumHintForRepeatedStringValue(attribute)) { @@ -227,7 +236,8 @@ public void run(boolean isDryRun) { } private static boolean isEnumHintForValueDisplay(Attribute attribute) { - return attribute.isValueDisplay() && DataType.INT64.equals(attribute.getRuntimeDataType()); + return (attribute.isValueDisplay() && DataType.INT64.equals(attribute.getRuntimeDataType())) + || (attribute.isSimple() && DataType.STRING.equals(attribute.getRuntimeDataType())); } private static boolean isEnumHintForRepeatedStringValue(Attribute attribute) { @@ -306,27 +316,33 @@ private Pair computeRangeHint(Attribute attribute, boolean isD private List> computeEnumHintForValueDisplay( Attribute attribute, boolean isDryRun) { // Build the query. - // SELECT attrVal AS enumVal, attrDisp AS enumDisp, COUNT(*) AS enumCount FROM indextable GROUP - // BY enumVal, enumDisp + // SELECT attrVal AS enumVal[, attrDisp AS enumDisp], COUNT(*) AS enumCount FROM indextable + // GROUP BY enumVal[, enumDisp] SqlField attrValField = indexAttributesTable.getAttributeValueField(attribute.getName()); - SqlField attrDispField = indexAttributesTable.getAttributeDisplayField(attribute.getName()); + SqlField attrDispField = + attribute.isValueDisplay() + ? indexAttributesTable.getAttributeDisplayField(attribute.getName()) + : null; final String enumValAlias = "enumVal"; final String enumDispAlias = "enumDisp"; final String enumCountAlias = "enumCount"; String selectEnumCountSql = - "SELECT " - + SqlQueryField.of(attrValField, enumValAlias).renderForSelect() - + ", " - + SqlQueryField.of(attrDispField, enumDispAlias).renderForSelect() - + ", COUNT(*) AS " + "SELECT " + SqlQueryField.of(attrValField, enumValAlias).renderForSelect(); + if (attribute.isValueDisplay()) { + selectEnumCountSql += ", " + SqlQueryField.of(attrDispField, enumDispAlias).renderForSelect(); + } + selectEnumCountSql += + ", COUNT(*) AS " + enumCountAlias + " FROM " + indexAttributesTable.getTablePointer().render() + " GROUP BY " - + SqlQueryField.of(attrValField, enumValAlias).renderForGroupBy(null, true) - + ", " - + SqlQueryField.of(attrDispField, enumDispAlias).renderForGroupBy(null, true); + + SqlQueryField.of(attrValField, enumValAlias).renderForGroupBy(null, true); + if (attribute.isValueDisplay()) { + selectEnumCountSql += + ", " + SqlQueryField.of(attrDispField, enumDispAlias).renderForGroupBy(null, true); + } LOGGER.info("SQL enum count: {}", selectEnumCountSql); // Execute the query. @@ -344,16 +360,26 @@ private List> computeEnumHintForValueDisplay( // Parse the result rows. for (FieldValueList rowResult : tableResult.getValues()) { FieldValue enumValFieldValue = rowResult.get(enumValAlias); - Literal enumVal = - Literal.forInt64(enumValFieldValue.isNull() ? null : enumValFieldValue.getLongValue()); - FieldValue enumDispFieldValue = rowResult.get(enumDispAlias); - String enumDisp = enumDispFieldValue.isNull() ? null : enumDispFieldValue.getStringValue(); + Literal enumVal; + String enumDisp; + if (attribute.isValueDisplay()) { + enumVal = + Literal.forInt64( + enumValFieldValue.isNull() ? null : enumValFieldValue.getLongValue()); + FieldValue enumDispFieldValue = rowResult.get(enumDispAlias); + enumDisp = enumDispFieldValue.isNull() ? null : enumDispFieldValue.getStringValue(); + } else { + enumVal = + Literal.forString( + enumValFieldValue.isNull() ? null : enumValFieldValue.getStringValue()); + enumDisp = null; + } FieldValue enumCountFieldValue = rowResult.get(enumCountAlias); long enumCount = enumCountFieldValue.getLongValue(); enumCounts.add(Pair.of(new ValueDisplay(enumVal, enumDisp), enumCount)); if (enumCounts.size() > MAX_ENUM_VALS_FOR_DISPLAY_HINT) { - // if there are more than the max number of values, then skip the display hint + // If there are more than the max number of values, then skip the display hint. LOGGER.info( "Skipping enum values display hint because there are >{} possible values: {}", MAX_ENUM_VALS_FOR_DISPLAY_HINT, @@ -363,17 +389,19 @@ private List> computeEnumHintForValueDisplay( } } - // Check that there is exactly one display per value. - Map valDisplay = new HashMap<>(); - enumCounts.forEach( - enumCount -> { - if (valDisplay.containsKey(enumCount.getKey().getValue())) { - throw new InvalidConfigException( - "Found >1 possible display for the enum value " + enumCount.getKey().getValue()); - } else { - valDisplay.put(enumCount.getKey().getValue(), enumCount.getKey().getDisplay()); - } - }); + if (attribute.isValueDisplay()) { + // Check that there is exactly one display per value. + Map valDisplay = new HashMap<>(); + enumCounts.forEach( + enumCount -> { + if (valDisplay.containsKey(enumCount.getKey().getValue())) { + throw new InvalidConfigException( + "Found >1 possible display for the enum value " + enumCount.getKey().getValue()); + } else { + valDisplay.put(enumCount.getKey().getValue(), enumCount.getKey().getDisplay()); + } + }); + } return enumCounts; } diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java index 887142c85..06d13c30e 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java @@ -271,8 +271,7 @@ public HintQueryResult run(HintQueryRequest hintQueryRequest) { .getHintedEntity() .getAttribute( sqlRowResult.get(attributeColName, DataType.STRING).getStringVal()); - if (attribute.isValueDisplay() - || attribute.getRuntimeDataType().equals(DataType.STRING)) { + if (attribute.isValueDisplay()) { // This is one (value,count) pair of an enum values hint. Literal enumVal = sqlRowResult.get(enumValColName, DataType.INT64); String enumDisplay = @@ -282,6 +281,13 @@ public HintQueryResult run(HintQueryRequest hintQueryRequest) { enumValues.containsKey(attribute) ? enumValues.get(attribute) : new HashMap<>(); enumValuesForAttr.put(new ValueDisplay(enumVal, enumDisplay), enumCount); enumValues.put(attribute, enumValuesForAttr); + } else if (attribute.getRuntimeDataType().equals(DataType.STRING)) { + Literal enumVal = sqlRowResult.get(enumValColName, DataType.STRING); + Long enumCount = sqlRowResult.get(enumCountColName, DataType.INT64).getInt64Val(); + Map enumValuesForAttr = + enumValues.containsKey(attribute) ? enumValues.get(attribute) : new HashMap<>(); + enumValuesForAttr.put(new ValueDisplay(enumVal), enumCount); + enumValues.put(attribute, enumValuesForAttr); } else { // This is a range hint. Double min = sqlRowResult.get(minColName, DataType.DOUBLE).getDoubleVal(); diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java index b557f9e2a..46243cc70 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java @@ -21,8 +21,12 @@ protected String getServiceConfigName() { } @Test - void entityLevelHint() { - Entity hintedEntity = underlay.getPrimaryEntity(); + void entityLevelHints() { + checkEntityLevelHints(underlay.getPrimaryEntity()); + checkEntityLevelHints(underlay.getEntity("brand")); + } + + private void checkEntityLevelHints(Entity hintedEntity) { HintQueryResult hintQueryResult = bqQueryRunner.run(new HintQueryRequest(underlay, hintedEntity, null, null, null, false)); @@ -43,8 +47,10 @@ void entityLevelHint() { assertTrue(hintInstance.getMin() <= hintInstance.getMax()); } else { // isEnumHint assertTrue( - attribute.isValueDisplay() - || attribute.getRuntimeDataType().equals(DataType.STRING)); + (attribute.isValueDisplay() + && DataType.INT64.equals(attribute.getRuntimeDataType())) + || (attribute.isSimple() + && DataType.STRING.equals(attribute.getRuntimeDataType()))); assertFalse(hintInstance.getEnumValueCounts().isEmpty()); hintInstance .getEnumValueCounts() From 0714da8407f231deb3f44be57fda145c1646ff79 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Mon, 9 Sep 2024 11:58:49 -0400 Subject: [PATCH 07/15] Group by repeated attribute field. Sql building and results parsing tests count query. --- .../tanagra/query/bigquery/BQQueryRunner.java | 19 ++++- .../field/BQAttributeFieldTranslator.java | 49 ++++++++---- .../BQCountQueryResultsTest.java | 75 ++++++++++++++++++- .../sqlbuilding/BQCountQueryTest.java | 60 ++++++++++++++- .../groupByRepeatedAttribute.sql | 12 +++ 5 files changed, 192 insertions(+), 23 deletions(-) create mode 100644 underlay/src/test/resources/sql/BQCountQueryTest/groupByRepeatedAttribute.sql diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java index 06d13c30e..393e190ff 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java @@ -389,15 +389,21 @@ private SqlQueryRequest buildQuerySqlAgainstIndexData( ITEntityMain entityMain = underlay.getIndexSchema().getEntityMain(singleEntity.getName()); List selectFieldSqls = new ArrayList<>(); + List joinTableSqls = new ArrayList<>(); selectFields.forEach( valueDisplayField -> { List sqlQueryFields; if (groupByFields.contains(valueDisplayField) && valueDisplayField instanceof AttributeField) { + BQAttributeFieldTranslator attributeFieldTranslator = + (BQAttributeFieldTranslator) bqTranslator.translator(valueDisplayField); sqlQueryFields = - ((BQAttributeFieldTranslator) bqTranslator.translator(valueDisplayField)) - .buildSqlFieldsForCountSelectAndGroupBy( - entityLevelHints.get(valueDisplayField.getEntity())); + attributeFieldTranslator.buildSqlFieldsForCountSelectAndGroupBy( + entityLevelHints.get(valueDisplayField.getEntity())); + String joinTableSql = attributeFieldTranslator.buildSqlJoinTableForCountQuery(); + if (joinTableSql != null) { + joinTableSqls.add(joinTableSql); + } } else { sqlQueryFields = bqTranslator.translator(valueDisplayField).buildSqlFieldsForListSelect(); @@ -407,12 +413,17 @@ private SqlQueryRequest buildQuerySqlAgainstIndexData( sqlQueryField -> selectFieldSqls.add(sqlQueryField.renderForSelect())); }); - // SELECT [select fields] FROM [entity main] + // SELECT [select fields] FROM [entity main] JOIN [join tables] sql.append("SELECT ") .append(String.join(", ", selectFieldSqls)) .append(" FROM ") .append(entityMain.getTablePointer().render()); + // JOIN [join tables] + if (!joinTableSqls.isEmpty()) { + sql.append(" ").append(String.join(" ", joinTableSqls)); + } + // WHERE [filter] EntityFilter singleEntityFilter = filters.get(singleEntity); if (singleEntityFilter != null) { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java index 2a7198ade..759c513a9 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java @@ -12,6 +12,7 @@ import bio.terra.tanagra.query.sql.translator.ApiFieldTranslator; import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.indextable.ITEntityMain; +import jakarta.annotation.*; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -34,7 +35,7 @@ public BQAttributeFieldTranslator(AttributeField attributeField) { @Override public List buildSqlFieldsForListSelect() { - return buildSqlFields(true, true); + return buildSqlFields(true, true, false); } public List buildSqlFieldsForCountSelectAndGroupBy( @@ -42,21 +43,39 @@ public List buildSqlFieldsForCountSelectAndGroupBy( return buildSqlFields( true, entityLevelHints == null - || entityLevelHints.getHintInstance(attributeField.getAttribute()).isEmpty()); + || entityLevelHints.getHintInstance(attributeField.getAttribute()).isEmpty(), + true); + } + + public @Nullable String buildSqlJoinTableForCountQuery() { + Attribute attribute = attributeField.getAttribute(); + if (!attribute.isDataTypeRepeated() || attributeField.isAgainstSourceDataset()) { + return null; + } + + SqlField valueField = indexTable.getAttributeValueField(attribute.getName()); + if (attribute.hasRuntimeSqlFunctionWrapper()) { + valueField = valueField.cloneWithFunctionWrapper(attribute.getRuntimeSqlFunctionWrapper()); + } + SqlQueryField valueSqlQueryField = SqlQueryField.of(valueField); + return "CROSS JOIN UNNEST(" + + valueSqlQueryField.renderForSelect() + + ") AS " + + getValueFieldAlias(true); } @Override public List buildSqlFieldsForOrderBy() { - return buildSqlFields(false, true); + return buildSqlFields(false, true, false); } @Override public List buildSqlFieldsForGroupBy() { - return buildSqlFields(true, false); + return buildSqlFields(true, false, true); } private List buildSqlFields( - boolean includeValueField, boolean includeDisplayField) { + boolean includeValueField, boolean includeDisplayField, boolean flattenRepeatedValues) { Attribute attribute = attributeField.getAttribute(); SqlQueryField valueSqlQueryField; @@ -65,12 +84,15 @@ private List buildSqlFields( SqlField valueField = SqlField.of(attribute.getSourceQuery().getValueFieldName()); valueSqlQueryField = SqlQueryField.of(valueField); hasDisplayField = attribute.getSourceQuery().hasDisplayField(); + } else if (attribute.isDataTypeRepeated() && flattenRepeatedValues) { + valueSqlQueryField = SqlQueryField.of(SqlField.of(getValueFieldAlias(true))); + hasDisplayField = false; } else { SqlField valueField = indexTable.getAttributeValueField(attribute.getName()); if (attribute.hasRuntimeSqlFunctionWrapper()) { valueField = valueField.cloneWithFunctionWrapper(attribute.getRuntimeSqlFunctionWrapper()); } - valueSqlQueryField = SqlQueryField.of(valueField, getValueFieldAlias()); + valueSqlQueryField = SqlQueryField.of(valueField, getValueFieldAlias(false)); hasDisplayField = !attribute.isSimple(); } if (!hasDisplayField || attributeField.isExcludeDisplay()) { @@ -92,10 +114,10 @@ private List buildSqlFields( return sqlQueryFields; } - private String getValueFieldAlias() { - return indexTable - .getAttributeValueField(attributeField.getAttribute().getName()) - .getColumnName(); + private String getValueFieldAlias(boolean flattenRepeatedValues) { + String alias = + indexTable.getAttributeValueField(attributeField.getAttribute().getName()).getColumnName(); + return flattenRepeatedValues ? ("FLATTENED_" + alias) : alias; } private String getDisplayFieldAlias() { @@ -109,12 +131,12 @@ public ValueDisplay parseValueDisplayFromResult(SqlRowResult sqlRowResult) { if (attributeField.getAttribute().isDataTypeRepeated()) { List repeatedValueField = sqlRowResult.getRepeated( - getValueFieldAlias(), attributeField.getAttribute().getRuntimeDataType()); + getValueFieldAlias(false), attributeField.getAttribute().getRuntimeDataType()); return new ValueDisplay(repeatedValueField); } else { Literal valueField = sqlRowResult.get( - getValueFieldAlias(), attributeField.getAttribute().getRuntimeDataType()); + getValueFieldAlias(false), attributeField.getAttribute().getRuntimeDataType()); if (attributeField.getAttribute().isSimple() || attributeField.isExcludeDisplay()) { return new ValueDisplay(valueField); } else { @@ -127,7 +149,8 @@ public ValueDisplay parseValueDisplayFromResult(SqlRowResult sqlRowResult) { public ValueDisplay parseValueDisplayFromCountResult( SqlRowResult sqlRowResult, HintQueryResult entityLevelHints) { Literal valueField = - sqlRowResult.get(getValueFieldAlias(), attributeField.getAttribute().getRuntimeDataType()); + sqlRowResult.get( + getValueFieldAlias(true), attributeField.getAttribute().getRuntimeDataType()); if (attributeField.getAttribute().isSimple() || attributeField.isExcludeDisplay()) { if (attributeField.isExcludeDisplay()) { diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java index 286eaf8ae..aec83d463 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java @@ -22,8 +22,7 @@ import bio.terra.tanagra.api.shared.OrderByDirection; import bio.terra.tanagra.api.shared.ValueDisplay; import bio.terra.tanagra.query.bigquery.BQRunnerTest; -import bio.terra.tanagra.underlay.entitymodel.Entity; -import bio.terra.tanagra.underlay.entitymodel.Hierarchy; +import bio.terra.tanagra.underlay.entitymodel.*; import bio.terra.tanagra.underlay.entitymodel.entitygroup.EntityGroup; import java.util.List; import java.util.Map; @@ -115,6 +114,78 @@ void attributeField() { }); } + @Test + void repeatedAttributeField() { + Entity entity = underlay.getEntity("condition"); + + // We don't have an example of an attribute with a repeated data type, yet. + // So create an artificial attribute just for this test. + AttributeField repeatedStringAttribute = + new AttributeField( + underlay, + entity, + new Attribute( + "vocabulary", + DataType.STRING, + true, + false, + false, + "['foo', 'bar', 'baz', ${fieldSql}]", + DataType.STRING, + entity.getAttribute("vocabulary").isComputeDisplayHint(), + entity.getAttribute("vocabulary").isSuppressedForExport(), + entity.getAttribute("vocabulary").isVisitDateForTemporalQuery(), + entity.getAttribute("vocabulary").isVisitIdForTemporalQuery(), + entity.getAttribute("vocabulary").getSourceQuery()), + false); + + List groupBys = List.of(repeatedStringAttribute); + HintQueryResult entityLevelHints = + new HintQueryResult( + "", + List.of( + new HintInstance( + entity.getAttribute("vocabulary"), + Map.of( + new ValueDisplay(Literal.forString("foo")), + 25L, + new ValueDisplay(Literal.forString("bar")), + 140L, + new ValueDisplay(Literal.forString("baz")), + 85L)))); + CountQueryResult countQueryResult = + bqQueryRunner.run( + new CountQueryRequest( + underlay, + entity, + null, + groupBys, + null, + OrderByDirection.DESCENDING, + null, + null, + null, + entityLevelHints, + false)); + + // Make sure we got some results back. + assertFalse(countQueryResult.getCountInstances().isEmpty()); + + // Check each of the group by fields. + countQueryResult + .getCountInstances() + .forEach( + countInstance -> { + ValueDisplay vocabulary = countInstance.getEntityFieldValue(repeatedStringAttribute); + assertNotNull(vocabulary); + assertTrue( + vocabulary.getValue().isNull() + || DataType.STRING.equals(vocabulary.getValue().getDataType())); + assertNotNull(vocabulary.getValue().getStringVal()); + assertNull(vocabulary.getDisplay()); + }); + } + @Test void hierarchyFields() { Entity entity = underlay.getEntity("condition"); diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQCountQueryTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQCountQueryTest.java index 2c30166ab..88ed46ece 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQCountQueryTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQCountQueryTest.java @@ -6,10 +6,7 @@ import bio.terra.tanagra.api.query.count.CountQueryResult; import bio.terra.tanagra.api.query.hint.HintInstance; import bio.terra.tanagra.api.query.hint.HintQueryResult; -import bio.terra.tanagra.api.shared.BinaryOperator; -import bio.terra.tanagra.api.shared.Literal; -import bio.terra.tanagra.api.shared.OrderByDirection; -import bio.terra.tanagra.api.shared.ValueDisplay; +import bio.terra.tanagra.api.shared.*; import bio.terra.tanagra.query.bigquery.BQRunnerTest; import bio.terra.tanagra.query.bigquery.BQTable; import bio.terra.tanagra.underlay.entitymodel.Attribute; @@ -156,6 +153,61 @@ void groupByValueDisplayAttribute() throws IOException { "groupByValueDisplayField", countQueryResult.getSql(), entityMainTable); } + @Test + void groupByRepeatedAttribute() throws IOException { + Entity entity = underlay.getEntity("condition"); + + // We don't have an example of an attribute with a repeated data type, yet. + // So create an artificial attribute just for this test. + Attribute groupByAttribute = + new Attribute( + "vocabulary", + DataType.STRING, + true, + false, + false, + "['foo', 'bar', 'baz', ${fieldSql}]", + DataType.STRING, + entity.getAttribute("vocabulary").isComputeDisplayHint(), + entity.getAttribute("vocabulary").isSuppressedForExport(), + entity.getAttribute("vocabulary").isVisitDateForTemporalQuery(), + entity.getAttribute("vocabulary").isVisitIdForTemporalQuery(), + entity.getAttribute("vocabulary").getSourceQuery()); + AttributeField groupByAttributeField = + new AttributeField(underlay, entity, groupByAttribute, false); + HintQueryResult hintQueryResult = + new HintQueryResult( + "", + List.of( + new HintInstance( + groupByAttribute, + Map.of( + new ValueDisplay(Literal.forString("foo")), + 25L, + new ValueDisplay(Literal.forString("bar")), + 140L, + new ValueDisplay(Literal.forString("baz")), + 85L)))); + CountQueryResult countQueryResult = + bqQueryRunner.run( + new CountQueryRequest( + underlay, + entity, + null, + List.of(groupByAttributeField), + null, + OrderByDirection.DESCENDING, + null, + null, + null, + hintQueryResult, + true)); + BQTable entityMainTable = + underlay.getIndexSchema().getEntityMain(entity.getName()).getTablePointer(); + assertSqlMatchesWithTableNameOnly( + "groupByRepeatedAttribute", countQueryResult.getSql(), entityMainTable); + } + @Test void countDistinctAttributeNotId() throws IOException { Entity entity = underlay.getEntity("conditionOccurrence"); diff --git a/underlay/src/test/resources/sql/BQCountQueryTest/groupByRepeatedAttribute.sql b/underlay/src/test/resources/sql/BQCountQueryTest/groupByRepeatedAttribute.sql new file mode 100644 index 000000000..b28a02293 --- /dev/null +++ b/underlay/src/test/resources/sql/BQCountQueryTest/groupByRepeatedAttribute.sql @@ -0,0 +1,12 @@ + + SELECT + COUNT(id) AS T_CTDT, + FLATTENED_vocabulary + FROM + ${ENT_condition} + CROSS JOIN + UNNEST(['foo', 'bar', 'baz', vocabulary]) AS FLATTENED_vocabulary + GROUP BY + FLATTENED_vocabulary + ORDER BY + T_CTDT DESC From dd3f5276d82b8523776d8bf0453a9471a2da96ae Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Mon, 9 Sep 2024 14:02:39 -0400 Subject: [PATCH 08/15] Filter on repeated attribute field. Sql building test list query. --- .../tanagra/api/filter/AttributeFilter.java | 16 +++++ .../bigquery/translator/BQApiTranslator.java | 23 ++++++++ .../filter/BQAttributeFilterTranslator.java | 29 ++++++++- .../query/sql/translator/ApiTranslator.java | 7 +++ .../filter/BQAttributeFilterTest.java | 59 +++++++++++++++++-- .../repeatedAttributeFilterBinary.sql | 12 ++++ .../repeatedAttributeFilterNaryIn.sql | 12 ++++ 7 files changed, 153 insertions(+), 5 deletions(-) create mode 100644 underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinary.sql create mode 100644 underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterNaryIn.sql diff --git a/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java b/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java index ec5975772..134b6e08b 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java @@ -98,6 +98,22 @@ public boolean hasBinaryOperator() { return binaryOperator != null; } + public boolean hasNaryOperator() { + return naryOperator != null; + } + + public String getOperatorName() { + if (hasUnaryOperator()) { + return unaryOperator.name(); + } else if (hasBinaryOperator()) { + return binaryOperator.name(); + } else if (hasNaryOperator()) { + return naryOperator.name(); + } else { + return null; + } + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/BQApiTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/BQApiTranslator.java index c959d3991..25b86df73 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/BQApiTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/BQApiTranslator.java @@ -17,6 +17,7 @@ import bio.terra.tanagra.api.filter.TemporalPrimaryFilter; import bio.terra.tanagra.api.filter.TextSearchFilter; import bio.terra.tanagra.api.filter.TextSearchFilter.TextSearchOperator; +import bio.terra.tanagra.api.shared.*; import bio.terra.tanagra.query.bigquery.translator.field.BQAttributeFieldTranslator; import bio.terra.tanagra.query.bigquery.translator.field.BQCountDistinctFieldTranslator; import bio.terra.tanagra.query.bigquery.translator.field.BQHierarchyIsMemberFieldTranslator; @@ -33,9 +34,12 @@ import bio.terra.tanagra.query.bigquery.translator.filter.BQRelationshipFilterTranslator; import bio.terra.tanagra.query.bigquery.translator.filter.BQTemporalPrimaryFilterTranslator; import bio.terra.tanagra.query.bigquery.translator.filter.BQTextSearchFilterTranslator; +import bio.terra.tanagra.query.sql.*; import bio.terra.tanagra.query.sql.translator.ApiFieldTranslator; import bio.terra.tanagra.query.sql.translator.ApiFilterTranslator; import bio.terra.tanagra.query.sql.translator.ApiTranslator; +import jakarta.annotation.*; +import java.util.*; public final class BQApiTranslator implements ApiTranslator { @Override @@ -118,6 +122,25 @@ public ApiFilterTranslator translator(TemporalPrimaryFilter temporalPrimaryFilte return new BQTemporalPrimaryFilterTranslator(this, temporalPrimaryFilter); } + @Override + public String naryFilterOnRepeatedFieldSql( + SqlField field, + NaryOperator naryOperator, + List values, + @Nullable String tableAlias, + SqlParams sqlParams) { + String functionTemplate = + "EXISTS (SELECT * FROM UNNEST(" + + FUNCTION_TEMPLATE_FIELD_VAR_BRACES + + ") AS flattened WHERE flattened " + + (NaryOperator.IN.equals(naryOperator) ? "IN" : "NOT IN") + + " (" + + FUNCTION_TEMPLATE_VALUES_VAR_BRACES + + "))"; + return functionWithCommaSeparatedArgsFilterSql( + field, functionTemplate, values, tableAlias, sqlParams); + } + @SuppressWarnings("PMD.TooFewBranchesForASwitchStatement") @Override public String textSearchOperatorTemplateSql(TextSearchFilter.TextSearchOperator operator) { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java index 033b73e36..2ca11b39f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java @@ -1,12 +1,15 @@ package bio.terra.tanagra.query.bigquery.translator.filter; import bio.terra.tanagra.api.filter.AttributeFilter; +import bio.terra.tanagra.api.shared.*; +import bio.terra.tanagra.exception.*; import bio.terra.tanagra.query.sql.SqlField; import bio.terra.tanagra.query.sql.SqlParams; import bio.terra.tanagra.query.sql.translator.ApiFilterTranslator; import bio.terra.tanagra.query.sql.translator.ApiTranslator; import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.indextable.ITEntityMain; +import jakarta.annotation.*; public class BQAttributeFilterTranslator extends ApiFilterTranslator { private final AttributeFilter attributeFilter; @@ -33,7 +36,31 @@ public String buildSql(SqlParams sqlParams, String tableAlias) { valueField = valueField.cloneWithFunctionWrapper(attribute.getRuntimeSqlFunctionWrapper()); } - if (attributeFilter.hasUnaryOperator()) { + if (attribute.isDataTypeRepeated()) { + boolean naryOperatorIn = + (attributeFilter.hasBinaryOperator() + && BinaryOperator.EQUALS.equals(attributeFilter.getBinaryOperator())) + || (attributeFilter.hasNaryOperator() + && NaryOperator.IN.equals(attributeFilter.getNaryOperator())); + boolean naryOperatorNotIn = + (attributeFilter.hasBinaryOperator() + && BinaryOperator.NOT_EQUALS.equals(attributeFilter.getBinaryOperator())) + || (attributeFilter.hasNaryOperator() + && NaryOperator.NOT_IN.equals(attributeFilter.getNaryOperator())); + if (!naryOperatorIn && !naryOperatorNotIn) { + throw new InvalidQueryException( + "Operator not supported for repeated data type attributes: " + + attributeFilter.getOperatorName() + + ", " + + attribute.getName()); + } + return apiTranslator.naryFilterOnRepeatedFieldSql( + valueField, + naryOperatorIn ? NaryOperator.IN : NaryOperator.NOT_IN, + attributeFilter.getValues(), + tableAlias, + sqlParams); + } else if (attributeFilter.hasUnaryOperator()) { return apiTranslator.unaryFilterSql( valueField, attributeFilter.getUnaryOperator(), tableAlias, sqlParams); } else if (attributeFilter.hasBinaryOperator()) { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/sql/translator/ApiTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/sql/translator/ApiTranslator.java index 64381398d..26c669139 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/sql/translator/ApiTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/sql/translator/ApiTranslator.java @@ -149,6 +149,13 @@ default String naryFilterSql( } } + String naryFilterOnRepeatedFieldSql( + SqlField field, + NaryOperator naryOperator, + List values, + @Nullable String tableAlias, + SqlParams sqlParams); + default String functionWithCommaSeparatedArgsFilterSql( SqlField field, String functionTemplate, diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java index 8b58e250e..0e6a6a695 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java @@ -4,10 +4,7 @@ import bio.terra.tanagra.api.filter.AttributeFilter; import bio.terra.tanagra.api.query.list.ListQueryRequest; import bio.terra.tanagra.api.query.list.ListQueryResult; -import bio.terra.tanagra.api.shared.BinaryOperator; -import bio.terra.tanagra.api.shared.Literal; -import bio.terra.tanagra.api.shared.NaryOperator; -import bio.terra.tanagra.api.shared.UnaryOperator; +import bio.terra.tanagra.api.shared.*; import bio.terra.tanagra.query.bigquery.BQRunnerTest; import bio.terra.tanagra.query.bigquery.BQTable; import bio.terra.tanagra.underlay.entitymodel.Attribute; @@ -75,4 +72,58 @@ void attributeFilter() throws IOException { assertSqlMatchesWithTableNameOnly( "attributeFilterNaryBetween", listQueryResult.getSql(), table); } + + @Test + void repeatedAttributeFilter() throws IOException { + Entity entity = underlay.getEntity("condition"); + + // We don't have an example of an attribute with a repeated data type, yet. + // So create an artificial attribute just for this test. + Attribute attribute = + new Attribute( + "vocabulary", + DataType.STRING, + true, + false, + false, + "['foo', 'bar', 'baz', ${fieldSql}]", + DataType.STRING, + entity.getAttribute("vocabulary").isComputeDisplayHint(), + entity.getAttribute("vocabulary").isSuppressedForExport(), + entity.getAttribute("vocabulary").isVisitDateForTemporalQuery(), + entity.getAttribute("vocabulary").isVisitIdForTemporalQuery(), + entity.getAttribute("vocabulary").getSourceQuery()); + + // Filter with binary operator NOT_EQUALS. + AttributeFilter attributeFilter = + new AttributeFilter( + underlay, entity, attribute, BinaryOperator.NOT_EQUALS, Literal.forString("SNOMED")); + AttributeField simpleAttribute = + new AttributeField(underlay, entity, entity.getAttribute("name"), false); + ListQueryResult listQueryResult = + bqQueryRunner.run( + ListQueryRequest.dryRunAgainstIndexData( + underlay, entity, List.of(simpleAttribute), attributeFilter, null, null)); + BQTable table = underlay.getIndexSchema().getEntityMain(entity.getName()).getTablePointer(); + assertSqlMatchesWithTableNameOnly( + "repeatedAttributeFilterBinary", listQueryResult.getSql(), table); + + // Filter with n-ary operator IN. + attributeFilter = + new AttributeFilter( + underlay, + entity, + attribute, + NaryOperator.IN, + List.of( + Literal.forString("bar"), + Literal.forString("ICD9CM"), + Literal.forString("SNOMED"))); + listQueryResult = + bqQueryRunner.run( + ListQueryRequest.dryRunAgainstIndexData( + underlay, entity, List.of(simpleAttribute), attributeFilter, null, null)); + assertSqlMatchesWithTableNameOnly( + "repeatedAttributeFilterNaryIn", listQueryResult.getSql(), table); + } } diff --git a/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinary.sql b/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinary.sql new file mode 100644 index 000000000..31b9e744c --- /dev/null +++ b/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinary.sql @@ -0,0 +1,12 @@ + + SELECT + name + FROM + ${ENT_condition} + WHERE + EXISTS (SELECT + * + FROM + UNNEST(['foo', 'bar', 'baz', vocabulary]) AS flattened + WHERE + flattened NOT IN (@val0)) diff --git a/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterNaryIn.sql b/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterNaryIn.sql new file mode 100644 index 000000000..741e11d17 --- /dev/null +++ b/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterNaryIn.sql @@ -0,0 +1,12 @@ + + SELECT + name + FROM + ${ENT_condition} + WHERE + EXISTS (SELECT + * + FROM + UNNEST(['foo', 'bar', 'baz', vocabulary]) AS flattened + WHERE + flattened IN (@val0, @val1, @val2)) From 4e8c32c6d854f1858a1a95abecb2853afe4c2bd2 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Mon, 9 Sep 2024 14:10:01 -0400 Subject: [PATCH 09/15] Only use FLATTENED alias for repeated attribute field. --- .../bigquery/translator/field/BQAttributeFieldTranslator.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java index 759c513a9..8e0718e94 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java @@ -117,7 +117,9 @@ private List buildSqlFields( private String getValueFieldAlias(boolean flattenRepeatedValues) { String alias = indexTable.getAttributeValueField(attributeField.getAttribute().getName()).getColumnName(); - return flattenRepeatedValues ? ("FLATTENED_" + alias) : alias; + return attributeField.getAttribute().isDataTypeRepeated() && flattenRepeatedValues + ? ("FLATTENED_" + alias) + : alias; } private String getDisplayFieldAlias() { From 3e4bce0e8e693de42f836ef64b4e12cce6def497 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Mon, 9 Sep 2024 17:10:51 -0400 Subject: [PATCH 10/15] OpenAPI repeated type in ValueDisplay. Bug in returning hint query results. --- .../app/controller/objmapping/ToApiUtils.java | 14 +++++++++++--- .../src/main/resources/api/service_openapi.yaml | 6 ++++++ .../tanagra/query/bigquery/BQQueryRunner.java | 2 +- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java b/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java index bfbbd0efb..cf95e1f52 100644 --- a/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java +++ b/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java @@ -42,10 +42,18 @@ public static ApiAttribute toApiObject(Attribute attribute) { public static ApiValueDisplay toApiObject(ValueDisplay valueDisplay) { ApiValueDisplay apiObject = new ApiValueDisplay(); - if (valueDisplay != null) { - apiObject.value(toApiObject(valueDisplay.getValue())).display(valueDisplay.getDisplay()); + if (valueDisplay == null) { + return apiObject; + } else if (valueDisplay.isRepeatedValue()) { + return apiObject + .isRepeatedValue(true) + .repeatedValue( + valueDisplay.getRepeatedValue().stream().map(ToApiUtils::toApiObject).toList()); + } else { + return apiObject + .value(toApiObject(valueDisplay.getValue())) + .display(valueDisplay.getDisplay()); } - return apiObject; } public static ApiLiteral toApiObject(Literal literal) { diff --git a/service/src/main/resources/api/service_openapi.yaml b/service/src/main/resources/api/service_openapi.yaml index 678d7f52f..62cfce8cf 100644 --- a/service/src/main/resources/api/service_openapi.yaml +++ b/service/src/main/resources/api/service_openapi.yaml @@ -1432,6 +1432,12 @@ components: type: string description: Optional display string nullable: true + repeatedValue: + type: array + items: + $ref: "#/components/schemas/Literal" + isRepeatedValue: + type: boolean Literal: type: object diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java index 393e190ff..eefe2d18f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java @@ -282,7 +282,7 @@ public HintQueryResult run(HintQueryRequest hintQueryRequest) { enumValuesForAttr.put(new ValueDisplay(enumVal, enumDisplay), enumCount); enumValues.put(attribute, enumValuesForAttr); } else if (attribute.getRuntimeDataType().equals(DataType.STRING)) { - Literal enumVal = sqlRowResult.get(enumValColName, DataType.STRING); + Literal enumVal = sqlRowResult.get(enumDisplayColName, DataType.STRING); Long enumCount = sqlRowResult.get(enumCountColName, DataType.INT64).getInt64Val(); Map enumValuesForAttr = enumValues.containsKey(attribute) ? enumValues.get(attribute) : new HashMap<>(); From 1ae4e10aa95670fcebfe34b991fd7849d35c08d9 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Mon, 9 Sep 2024 17:14:21 -0400 Subject: [PATCH 11/15] Remove variant config files. --- .../datamapping/aouCT/entity/variant/all.sql | 10 ------- .../aouCT/entity/variant/entity.json | 20 ------------- .../config/indexer/aouSC2023Q3R2_verily.json | 28 ------------------- .../underlay/aouSC2023Q3R2/underlay.json | 4 +-- 4 files changed, 1 insertion(+), 61 deletions(-) delete mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql delete mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json delete mode 100644 underlay/src/main/resources/config/indexer/aouSC2023Q3R2_verily.json diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql deleted file mode 100644 index 44729b870..000000000 --- a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql +++ /dev/null @@ -1,10 +0,0 @@ -SELECT ROW_NUMBER() OVER (ORDER BY vid) AS row_num, - vid, - gene_symbol, - consequence, - aa_change, - clinvar_classification, - gvs_all_ac, - gvs_all_an, - gvs_all_af -FROM `${omopDataset}.prep_vat` diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json deleted file mode 100644 index bfa47a358..000000000 --- a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "name": "variant", - "allInstancesSqlFile": "all.sql", - "attributes": [ - { "name": "id", "dataType": "INT64", "valueFieldName": "row_num" }, - { "name": "variant_id", "dataType": "STRING", "valueFieldName": "vid" }, - { "name": "gene", "dataType": "STRING", "valueFieldName": "gene_symbol", "isComputeDisplayHint": true }, - { "name": "consequence", "dataType": "STRING", "isDataTypeRepeated": true, "isComputeDisplayHint": true }, - { "name": "protein_change", "dataType": "STRING", "valueFieldName": "aa_change" }, - { "name": "clinvar_significance", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "clinvar_classification", "isComputeDisplayHint": true }, - { "name": "allele_count", "dataType": "INT64", "valueFieldName": "gvs_all_ac", "isComputeDisplayHint": true }, - { "name": "allele_number", "dataType": "INT64", "valueFieldName": "gvs_all_an", "isComputeDisplayHint": true }, - { "name": "allele_frequency", "dataType": "DOUBLE", "valueFieldName": "gvs_all_af", "isComputeDisplayHint": true } - ], - "idAttribute": "id", - "textSearch": { - "attributes": [ "gene", "clinvar_significance" ] - }, - "optimizeGroupByAttributes": [ "gender", "race", "age", "ethnicity" ] -} diff --git a/underlay/src/main/resources/config/indexer/aouSC2023Q3R2_verily.json b/underlay/src/main/resources/config/indexer/aouSC2023Q3R2_verily.json deleted file mode 100644 index f7f920116..000000000 --- a/underlay/src/main/resources/config/indexer/aouSC2023Q3R2_verily.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "underlay": "aouSC2023Q3R2", - "bigQuery": { - "sourceData": { - "projectId": "verily-tanagra-dev", - "datasetId": "aou_test_data_SC2023Q3R2", - "sqlSubstitutions": { - "omopDataset": "verily-tanagra-dev.aou_test_data_SC2023Q3R2", - "staticTablesDataset": "verily-tanagra-dev.aou_static_prep" - } - }, - "indexData": { - "projectId": "verily-tanagra-dev", - "datasetId": "aou_test_data_SC2023Q3R2_index_090424", - "tablePrefix": "T" - }, - "queryProjectId": "verily-tanagra-dev", - "dataLocation": "us" - }, - "dataflow": { - "serviceAccountEmail": "backend-default@verily-tanagra-dev.iam.gserviceaccount.com", - "dataflowLocation": "us-central1", - "gcsTempDirectory": "gs://dataflow-staging-us-central1-694046000181/temp/", - "workerMachineType": "n1-standard-4", - "usePublicIps": false, - "vpcSubnetworkName": null - } -} diff --git a/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json b/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json index c8c9f4ff1..70d3e4111 100644 --- a/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json +++ b/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json @@ -56,9 +56,7 @@ "aouRT/surveyPersonalAndFamilyHealth", "aouRT/surveyHealthCareAccess", "aouRT/surveySocialDeterminantsOfHealth", - "aouRT/surveyOccurrence", - - "aouCT/variant" + "aouRT/surveyOccurrence" ], "groupItemsEntityGroups": [ "aouRT/brandIngredientConcept", From 8eb84dc339268e46bcf664329eb695c72d77d46b Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Mon, 9 Sep 2024 17:17:09 -0400 Subject: [PATCH 12/15] Regenerate docs. --- docs/generated/UNDERLAY_CONFIG.md | 7 +++++++ ui/src/tanagra-underlay/underlayConfig.ts | 1 + 2 files changed, 8 insertions(+) diff --git a/docs/generated/UNDERLAY_CONFIG.md b/docs/generated/UNDERLAY_CONFIG.md index d700d3b88..4ecd86f74 100644 --- a/docs/generated/UNDERLAY_CONFIG.md +++ b/docs/generated/UNDERLAY_CONFIG.md @@ -73,6 +73,13 @@ When set to true, an indexing job will try to compute a display hint for this at *Default value:* `false` +### SZAttribute.isDataTypeRepeated +**optional** boolean + +True if the data type is repeated (e.g. an array of ints). + +*Default value:* `false` + ### SZAttribute.isSuppressedForExport **optional** boolean diff --git a/ui/src/tanagra-underlay/underlayConfig.ts b/ui/src/tanagra-underlay/underlayConfig.ts index a7e110d0f..68bc5edb6 100644 --- a/ui/src/tanagra-underlay/underlayConfig.ts +++ b/ui/src/tanagra-underlay/underlayConfig.ts @@ -4,6 +4,7 @@ export type SZAttribute = { displayHintRangeMax?: number; displayHintRangeMin?: number; isComputeDisplayHint?: boolean; + isDataTypeRepeated?: boolean; isSuppressedForExport?: boolean; name: string; runtimeDataType?: SZDataType; From 66b66335266c2ba4f2b84242f81ca139d66fcf3d Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Tue, 10 Sep 2024 10:06:43 -0400 Subject: [PATCH 13/15] Check actual data rows in result parsing hint & count query tests. --- .../BQCountQueryResultsTest.java | 53 +++++++++++++++---- .../resultparsing/BQHintQueryResultsTest.java | 40 ++++++++++++-- .../filter/BQAttributeFilterTest.java | 2 +- ...epeatedAttributeFilterBinaryNotEquals.sql} | 0 4 files changed, 78 insertions(+), 17 deletions(-) rename underlay/src/test/resources/sql/BQAttributeFilterTest/{repeatedAttributeFilterBinary.sql => repeatedAttributeFilterBinaryNotEquals.sql} (100%) diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java index aec83d463..dc1690923 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java @@ -13,8 +13,7 @@ import bio.terra.tanagra.api.field.HierarchyPathField; import bio.terra.tanagra.api.field.RelatedEntityIdCountField; import bio.terra.tanagra.api.field.ValueDisplayField; -import bio.terra.tanagra.api.query.count.CountQueryRequest; -import bio.terra.tanagra.api.query.count.CountQueryResult; +import bio.terra.tanagra.api.query.count.*; import bio.terra.tanagra.api.query.hint.HintInstance; import bio.terra.tanagra.api.query.hint.HintQueryResult; import bio.terra.tanagra.api.shared.DataType; @@ -24,8 +23,7 @@ import bio.terra.tanagra.query.bigquery.BQRunnerTest; import bio.terra.tanagra.underlay.entitymodel.*; import bio.terra.tanagra.underlay.entitymodel.entitygroup.EntityGroup; -import java.util.List; -import java.util.Map; +import java.util.*; import org.junit.jupiter.api.Test; public class BQCountQueryResultsTest extends BQRunnerTest { @@ -168,22 +166,55 @@ void repeatedAttributeField() { entityLevelHints, false)); - // Make sure we got some results back. - assertFalse(countQueryResult.getCountInstances().isEmpty()); - // Check each of the group by fields. - countQueryResult - .getCountInstances() + countQueryResult.getCountInstances().stream() + .map(countInstance -> countInstance.getEntityFieldValue(repeatedStringAttribute)) .forEach( - countInstance -> { - ValueDisplay vocabulary = countInstance.getEntityFieldValue(repeatedStringAttribute); + vocabulary -> { assertNotNull(vocabulary); assertTrue( vocabulary.getValue().isNull() || DataType.STRING.equals(vocabulary.getValue().getDataType())); + assertFalse(vocabulary.isRepeatedValue()); assertNotNull(vocabulary.getValue().getStringVal()); assertNull(vocabulary.getDisplay()); }); + + // Condition entity should have an enum string-value hint with 4 + 3 values for vocabulary. The + // three fake ones should all have matching counts. + assertEquals(7, countQueryResult.getCountInstances().size()); + Optional fooCount = + countQueryResult.getCountInstances().stream() + .filter( + countInstance -> + countInstance + .getEntityFieldValue(repeatedStringAttribute) + .getValue() + .equals(Literal.forString("foo"))) + .findAny(); + assertTrue(fooCount.isPresent()); + Optional barCount = + countQueryResult.getCountInstances().stream() + .filter( + countInstance -> + countInstance + .getEntityFieldValue(repeatedStringAttribute) + .getValue() + .equals(Literal.forString("bar"))) + .findAny(); + assertTrue(barCount.isPresent()); + Optional bazCount = + countQueryResult.getCountInstances().stream() + .filter( + countInstance -> + countInstance + .getEntityFieldValue(repeatedStringAttribute) + .getValue() + .equals(Literal.forString("baz"))) + .findAny(); + assertTrue(bazCount.isPresent()); + assertTrue(fooCount.get().getCount() == barCount.get().getCount()); + assertTrue(barCount.get().getCount() == bazCount.get().getCount()); } @Test diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java index 46243cc70..74f98ed15 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java @@ -3,8 +3,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; -import bio.terra.tanagra.api.query.hint.HintQueryRequest; -import bio.terra.tanagra.api.query.hint.HintQueryResult; +import bio.terra.tanagra.api.query.hint.*; import bio.terra.tanagra.api.shared.DataType; import bio.terra.tanagra.api.shared.Literal; import bio.terra.tanagra.query.bigquery.BQRunnerTest; @@ -22,11 +21,41 @@ protected String getServiceConfigName() { @Test void entityLevelHints() { - checkEntityLevelHints(underlay.getPrimaryEntity()); - checkEntityLevelHints(underlay.getEntity("brand")); + // Person entity should have range hints for year_of_birth and age (runtime calculated), and an + // enum value-display hint with 3 values for gender. + List hintInstances = checkEntityLevelHints(underlay.getPrimaryEntity()); + assertTrue( + hintInstances.stream() + .anyMatch( + hintInstance -> + hintInstance.getAttribute().getName().equals("year_of_birth") + && hintInstance.isRangeHint())); + assertTrue( + hintInstances.stream() + .anyMatch( + hintInstance -> + hintInstance.getAttribute().getName().equals("age") + && hintInstance.isRangeHint())); + assertTrue( + hintInstances.stream() + .anyMatch( + hintInstance -> + hintInstance.getAttribute().getName().equals("gender") + && hintInstance.isEnumHint() + && hintInstance.getEnumValueCounts().size() == 3)); + + // Brand entity should have an enum string-value hint with 2 values for vocabulary. + hintInstances = checkEntityLevelHints(underlay.getEntity("brand")); + assertTrue( + hintInstances.stream() + .anyMatch( + hintInstance -> + hintInstance.getAttribute().getName().equals("vocabulary") + && hintInstance.isEnumHint() + && hintInstance.getEnumValueCounts().size() == 2)); } - private void checkEntityLevelHints(Entity hintedEntity) { + private List checkEntityLevelHints(Entity hintedEntity) { HintQueryResult hintQueryResult = bqQueryRunner.run(new HintQueryRequest(underlay, hintedEntity, null, null, null, false)); @@ -64,6 +93,7 @@ private void checkEntityLevelHints(Entity hintedEntity) { .equals(enumValue.getValue().getDataType()))); } }); + return hintQueryResult.getHintInstances(); } @Test diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java index 0e6a6a695..70bf325c9 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java @@ -106,7 +106,7 @@ void repeatedAttributeFilter() throws IOException { underlay, entity, List.of(simpleAttribute), attributeFilter, null, null)); BQTable table = underlay.getIndexSchema().getEntityMain(entity.getName()).getTablePointer(); assertSqlMatchesWithTableNameOnly( - "repeatedAttributeFilterBinary", listQueryResult.getSql(), table); + "repeatedAttributeFilterBinaryNotEquals", listQueryResult.getSql(), table); // Filter with n-ary operator IN. attributeFilter = diff --git a/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinary.sql b/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinaryNotEquals.sql similarity index 100% rename from underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinary.sql rename to underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinaryNotEquals.sql From 3a5b7e9b83b98776ed7ab772dc3aab05e9414367 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Tue, 10 Sep 2024 12:16:45 -0400 Subject: [PATCH 14/15] Fix overlapping conditional in eldh indexing job. --- .../indexing/job/bigquery/WriteEntityLevelDisplayHints.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java index 588c63f8f..2f810b7c0 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java @@ -237,7 +237,9 @@ public void run(boolean isDryRun) { private static boolean isEnumHintForValueDisplay(Attribute attribute) { return (attribute.isValueDisplay() && DataType.INT64.equals(attribute.getRuntimeDataType())) - || (attribute.isSimple() && DataType.STRING.equals(attribute.getRuntimeDataType())); + || (attribute.isSimple() + && !attribute.isDataTypeRepeated() + && DataType.STRING.equals(attribute.getRuntimeDataType())); } private static boolean isEnumHintForRepeatedStringValue(Attribute attribute) { From 89bfadba54e1e76e6ebab3e5fb98a9f615183f03 Mon Sep 17 00:00:00 2001 From: Mariko Medlock Date: Thu, 12 Sep 2024 16:33:28 -0400 Subject: [PATCH 15/15] Always populate repeatedValue property of ValueDisplay in OpenAPI. --- .../tanagra/app/controller/objmapping/ToApiUtils.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java b/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java index cf95e1f52..eb572056e 100644 --- a/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java +++ b/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java @@ -50,9 +50,12 @@ public static ApiValueDisplay toApiObject(ValueDisplay valueDisplay) { .repeatedValue( valueDisplay.getRepeatedValue().stream().map(ToApiUtils::toApiObject).toList()); } else { + ApiLiteral apiValue = toApiObject(valueDisplay.getValue()); return apiObject - .value(toApiObject(valueDisplay.getValue())) - .display(valueDisplay.getDisplay()); + .value(apiValue) + .display(valueDisplay.getDisplay()) + .isRepeatedValue(false) + .repeatedValue(List.of(apiValue)); } }