diff --git a/docs/generated/UNDERLAY_CONFIG.md b/docs/generated/UNDERLAY_CONFIG.md index 2c48725ae..908f9daae 100644 --- a/docs/generated/UNDERLAY_CONFIG.md +++ b/docs/generated/UNDERLAY_CONFIG.md @@ -73,6 +73,13 @@ When set to true, an indexing job will try to compute a display hint for this at *Default value:* `false` +### SZAttribute.isDataTypeRepeated +**optional** boolean + +True if the data type is repeated (e.g. an array of ints). + +*Default value:* `false` + ### SZAttribute.isSuppressedForExport **optional** boolean diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/CreateEntityMain.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/CreateEntityMain.java index 642d5b167..90bfb1c51 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/CreateEntityMain.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/CreateEntityMain.java @@ -47,6 +47,10 @@ public void run(boolean isDryRun) { Field.newBuilder( columnSchema.getColumnName(), BigQueryBeamUtils.fromDataType(columnSchema.getDataType())) + .setMode( + columnSchema.isDataTypeRepeated() + ? Field.Mode.REPEATED + : Field.Mode.NULLABLE) .build()) .collect(Collectors.toList()); diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/ValidateDataTypes.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/ValidateDataTypes.java index 4c96777d4..ebd9642fc 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/ValidateDataTypes.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/ValidateDataTypes.java @@ -92,6 +92,18 @@ public void run(boolean isDryRun) { .collect(Collectors.joining(",", "[", "]")), sourceQueryField.getType()); } + + // Check that the schema repeated flags match those of the index table columns. + boolean sourceQueryFieldIsRepeated = sourceQueryField.getMode().equals(Field.Mode.REPEATED); + boolean isRepeatedFlagsMatch = attribute.isDataTypeRepeated() == sourceQueryFieldIsRepeated; + if (!isRepeatedFlagsMatch) { + foundError = true; + LOGGER.info( + "Data type repeated mismatch found for attribute {}: entity declared {}, SQL schema returns {}", + attribute.getName(), + attribute.isDataTypeRepeated(), + sourceQueryField.getMode()); + } } if (foundError) { throw new InvalidConfigException("Data type mismatch found for entity: " + entity.getName()); diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java index 92ba8862a..2f810b7c0 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteEntityLevelDisplayHints.java @@ -113,23 +113,53 @@ public void run(boolean isDryRun) { attribute.getName(), minMax.getKey(), minMax.getValue()); - } else if (isEnumHint(attribute)) { - List> enumCounts = computeEnumHint(attribute, isDryRun); + } else if (isEnumHintForValueDisplay(attribute)) { + List> enumCounts = + computeEnumHintForValueDisplay(attribute, isDryRun); + enumCounts.forEach( + enumCount -> { + Literal int64Field = + attribute.isValueDisplay() + ? enumCount.getKey().getValue() + : Literal.forInt64(null); + Literal stringField = + attribute.isValueDisplay() + ? Literal.forString(enumCount.getKey().getDisplay()) + : enumCount.getKey().getValue(); + + List rowOfLiterals = new ArrayList<>(); + rowOfLiterals.add(Literal.forString(attribute.getName())); + rowOfLiterals.add(Literal.forDouble(null)); + rowOfLiterals.add(Literal.forDouble(null)); + rowOfLiterals.add(int64Field); + rowOfLiterals.add(stringField); + rowOfLiterals.add(Literal.forInt64(enumCount.getValue())); + insertRows.add(rowOfLiterals); + LOGGER.info( + "Enum value-display or simple-string hint: {}, {}, {}, {}", + attribute.getName(), + int64Field, + stringField, + enumCount.getValue()); + }); + } else if (isEnumHintForRepeatedStringValue(attribute)) { + List> enumCounts = + computeEnumHintForRepeatedStringValue(attribute, isDryRun); enumCounts.forEach( enumCount -> { List rowOfLiterals = new ArrayList<>(); rowOfLiterals.add(Literal.forString(attribute.getName())); rowOfLiterals.add(Literal.forDouble(null)); rowOfLiterals.add(Literal.forDouble(null)); - rowOfLiterals.add(enumCount.getKey().getValue()); - rowOfLiterals.add(Literal.forString(enumCount.getKey().getDisplay())); + rowOfLiterals.add(Literal.forInt64(null)); + rowOfLiterals.add(enumCount.getKey()); rowOfLiterals.add(Literal.forInt64(enumCount.getValue())); insertRows.add(rowOfLiterals); LOGGER.info( - "Enum hint: {}, {}, {}, {}", + "Enum repeated-string hint: {}, {}, {}, {}", attribute.getName(), - enumCount.getKey().getValue(), - enumCount.getKey().getDisplay(), + null, + enumCount.getKey(), enumCount.getValue()); }); } else { @@ -205,8 +235,18 @@ public void run(boolean isDryRun) { } } - private static boolean isEnumHint(Attribute attribute) { - return attribute.isValueDisplay() && DataType.INT64.equals(attribute.getRuntimeDataType()); + private static boolean isEnumHintForValueDisplay(Attribute attribute) { + return (attribute.isValueDisplay() && DataType.INT64.equals(attribute.getRuntimeDataType())) + || (attribute.isSimple() + && !attribute.isDataTypeRepeated() + && DataType.STRING.equals(attribute.getRuntimeDataType())); + } + + private static boolean isEnumHintForRepeatedStringValue(Attribute attribute) { + // TODO: Support not-repeated string simple attributes & repeated integer attributes. + return attribute.isSimple() + && attribute.isDataTypeRepeated() + && (DataType.STRING.equals(attribute.getDataType())); } private static boolean isRangeHint(Attribute attribute) { @@ -214,7 +254,7 @@ private static boolean isRangeHint(Attribute attribute) { return false; } return switch (attribute.getRuntimeDataType()) { - case DOUBLE, INT64 -> true; + case DOUBLE, INT64 -> !attribute.isDataTypeRepeated(); default -> false; }; } @@ -275,29 +315,36 @@ private Pair computeRangeHint(Attribute attribute, boolean isD } } - private List> computeEnumHint(Attribute attribute, boolean isDryRun) { + private List> computeEnumHintForValueDisplay( + Attribute attribute, boolean isDryRun) { // Build the query. - // SELECT attrVal AS enumVal, attrDisp AS enumDisp, COUNT(*) AS enumCount FROM indextable GROUP - // BY enumVal, enumDisp + // SELECT attrVal AS enumVal[, attrDisp AS enumDisp], COUNT(*) AS enumCount FROM indextable + // GROUP BY enumVal[, enumDisp] SqlField attrValField = indexAttributesTable.getAttributeValueField(attribute.getName()); - SqlField attrDispField = indexAttributesTable.getAttributeDisplayField(attribute.getName()); + SqlField attrDispField = + attribute.isValueDisplay() + ? indexAttributesTable.getAttributeDisplayField(attribute.getName()) + : null; final String enumValAlias = "enumVal"; final String enumDispAlias = "enumDisp"; final String enumCountAlias = "enumCount"; String selectEnumCountSql = - "SELECT " - + SqlQueryField.of(attrValField, enumValAlias).renderForSelect() - + ", " - + SqlQueryField.of(attrDispField, enumDispAlias).renderForSelect() - + ", COUNT(*) AS " + "SELECT " + SqlQueryField.of(attrValField, enumValAlias).renderForSelect(); + if (attribute.isValueDisplay()) { + selectEnumCountSql += ", " + SqlQueryField.of(attrDispField, enumDispAlias).renderForSelect(); + } + selectEnumCountSql += + ", COUNT(*) AS " + enumCountAlias + " FROM " + indexAttributesTable.getTablePointer().render() + " GROUP BY " - + SqlQueryField.of(attrValField, enumValAlias).renderForGroupBy(null, true) - + ", " - + SqlQueryField.of(attrDispField, enumDispAlias).renderForGroupBy(null, true); + + SqlQueryField.of(attrValField, enumValAlias).renderForGroupBy(null, true); + if (attribute.isValueDisplay()) { + selectEnumCountSql += + ", " + SqlQueryField.of(attrDispField, enumDispAlias).renderForGroupBy(null, true); + } LOGGER.info("SQL enum count: {}", selectEnumCountSql); // Execute the query. @@ -315,16 +362,26 @@ private List> computeEnumHint(Attribute attribute, bool // Parse the result rows. for (FieldValueList rowResult : tableResult.getValues()) { FieldValue enumValFieldValue = rowResult.get(enumValAlias); - Literal enumVal = - Literal.forInt64(enumValFieldValue.isNull() ? null : enumValFieldValue.getLongValue()); - FieldValue enumDispFieldValue = rowResult.get(enumDispAlias); - String enumDisp = enumDispFieldValue.isNull() ? null : enumDispFieldValue.getStringValue(); + Literal enumVal; + String enumDisp; + if (attribute.isValueDisplay()) { + enumVal = + Literal.forInt64( + enumValFieldValue.isNull() ? null : enumValFieldValue.getLongValue()); + FieldValue enumDispFieldValue = rowResult.get(enumDispAlias); + enumDisp = enumDispFieldValue.isNull() ? null : enumDispFieldValue.getStringValue(); + } else { + enumVal = + Literal.forString( + enumValFieldValue.isNull() ? null : enumValFieldValue.getStringValue()); + enumDisp = null; + } FieldValue enumCountFieldValue = rowResult.get(enumCountAlias); long enumCount = enumCountFieldValue.getLongValue(); enumCounts.add(Pair.of(new ValueDisplay(enumVal, enumDisp), enumCount)); if (enumCounts.size() > MAX_ENUM_VALS_FOR_DISPLAY_HINT) { - // if there are more than the max number of values, then skip the display hint + // If there are more than the max number of values, then skip the display hint. LOGGER.info( "Skipping enum values display hint because there are >{} possible values: {}", MAX_ENUM_VALS_FOR_DISPLAY_HINT, @@ -334,17 +391,82 @@ private List> computeEnumHint(Attribute attribute, bool } } - // Check that there is exactly one display per value. - Map valDisplay = new HashMap<>(); - enumCounts.forEach( - enumCount -> { - if (valDisplay.containsKey(enumCount.getKey().getValue())) { - throw new InvalidConfigException( - "Found >1 possible display for the enum value " + enumCount.getKey().getValue()); - } else { - valDisplay.put(enumCount.getKey().getValue(), enumCount.getKey().getDisplay()); - } - }); + if (attribute.isValueDisplay()) { + // Check that there is exactly one display per value. + Map valDisplay = new HashMap<>(); + enumCounts.forEach( + enumCount -> { + if (valDisplay.containsKey(enumCount.getKey().getValue())) { + throw new InvalidConfigException( + "Found >1 possible display for the enum value " + enumCount.getKey().getValue()); + } else { + valDisplay.put(enumCount.getKey().getValue(), enumCount.getKey().getDisplay()); + } + }); + } + return enumCounts; + } + + private List> computeEnumHintForRepeatedStringValue( + Attribute attribute, boolean isDryRun) { + // TODO: Consolidate the logic here with the ValueDisplay enum hint method. + // Build the query. + // SELECT flattenedAttrVal AS enumVal, COUNT(*) AS enumCount FROM indextable + // CROSS JOIN UNNEST(indextable.attrVal) AS flattenedAttrVal + // GROUP BY enumVal + SqlField attrValField = indexAttributesTable.getAttributeValueField(attribute.getName()); + final String enumValAlias = "enumVal"; + final String enumCountAlias = "enumCount"; + final String flattenedAttrValAlias = "flattenedAttrVal"; + SqlField flattenedAttrValField = SqlField.of(flattenedAttrValAlias); + + String selectEnumCountSql = + "SELECT " + + SqlQueryField.of(flattenedAttrValField, enumValAlias).renderForSelect() + + ", COUNT(*) AS " + + enumCountAlias + + " FROM " + + indexAttributesTable.getTablePointer().render() + + " CROSS JOIN UNNEST(" + + SqlQueryField.of(attrValField).renderForSelect() + + ") AS " + + flattenedAttrValAlias + + " GROUP BY " + + SqlQueryField.of(attrValField, enumValAlias).renderForGroupBy(null, true); + LOGGER.info("SQL enum count: {}", selectEnumCountSql); + + // Execute the query. + List> enumCounts = new ArrayList<>(); + if (isDryRun) { + if (getOutputTable().isEmpty()) { + LOGGER.info("Skipping query dry run because output table does not exist yet."); + } else { + googleBigQuery.dryRunQuery(selectEnumCountSql); + } + enumCounts.add(Pair.of(Literal.forString(""), 0L)); + } else { + TableResult tableResult = googleBigQuery.runQueryLongTimeout(selectEnumCountSql); + + // Parse the result rows. + for (FieldValueList rowResult : tableResult.getValues()) { + FieldValue enumValFieldValue = rowResult.get(enumValAlias); + Literal enumVal = + Literal.forString( + enumValFieldValue.isNull() ? null : enumValFieldValue.getStringValue()); + FieldValue enumCountFieldValue = rowResult.get(enumCountAlias); + long enumCount = enumCountFieldValue.getLongValue(); + enumCounts.add(Pair.of(enumVal, enumCount)); + + if (enumCounts.size() > MAX_ENUM_VALS_FOR_DISPLAY_HINT) { + // if there are more than the max number of values, then skip the display hint + LOGGER.info( + "Skipping enum values display hint because there are >{} possible values: {}", + MAX_ENUM_VALS_FOR_DISPLAY_HINT, + attribute.getName()); + return List.of(); + } + } + } return enumCounts; } } diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteTextSearchField.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteTextSearchField.java index ec14e6e6f..53ce03bcb 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteTextSearchField.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/bigquery/WriteTextSearchField.java @@ -69,13 +69,22 @@ public void run(boolean isDryRun) { SqlField attributeTextField; if (attribute.isValueDisplay()) { attributeTextField = indexTable.getAttributeDisplayField(attribute.getName()); - } else if (!attribute.getDataType().equals(DataType.STRING)) { - attributeTextField = - indexTable - .getAttributeValueField(attribute.getName()) - .cloneWithFunctionWrapper("CAST(${fieldSql} AS STRING)"); } else { + String functionWrapper = null; + if (!attribute.getDataType().equals(DataType.STRING)) { + functionWrapper = "CAST(${fieldSql} AS STRING)"; + } + if (attribute.isDataTypeRepeated()) { + functionWrapper = + "ARRAY_TO_STRING(" + + (functionWrapper == null ? "${fieldSql}" : functionWrapper) + + ", \" \")"; + } + attributeTextField = indexTable.getAttributeValueField(attribute.getName()); + if (functionWrapper != null) { + attributeTextField = attributeTextField.cloneWithFunctionWrapper(functionWrapper); + } } String idTextSql = diff --git a/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java b/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java index bfbbd0efb..eb572056e 100644 --- a/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java +++ b/service/src/main/java/bio/terra/tanagra/app/controller/objmapping/ToApiUtils.java @@ -42,10 +42,21 @@ public static ApiAttribute toApiObject(Attribute attribute) { public static ApiValueDisplay toApiObject(ValueDisplay valueDisplay) { ApiValueDisplay apiObject = new ApiValueDisplay(); - if (valueDisplay != null) { - apiObject.value(toApiObject(valueDisplay.getValue())).display(valueDisplay.getDisplay()); + if (valueDisplay == null) { + return apiObject; + } else if (valueDisplay.isRepeatedValue()) { + return apiObject + .isRepeatedValue(true) + .repeatedValue( + valueDisplay.getRepeatedValue().stream().map(ToApiUtils::toApiObject).toList()); + } else { + ApiLiteral apiValue = toApiObject(valueDisplay.getValue()); + return apiObject + .value(apiValue) + .display(valueDisplay.getDisplay()) + .isRepeatedValue(false) + .repeatedValue(List.of(apiValue)); } - return apiObject; } public static ApiLiteral toApiObject(Literal literal) { diff --git a/service/src/main/resources/api/service_openapi.yaml b/service/src/main/resources/api/service_openapi.yaml index 15d4674b7..b9654cd69 100644 --- a/service/src/main/resources/api/service_openapi.yaml +++ b/service/src/main/resources/api/service_openapi.yaml @@ -1432,6 +1432,12 @@ components: type: string description: Optional display string nullable: true + repeatedValue: + type: array + items: + $ref: "#/components/schemas/Literal" + isRepeatedValue: + type: boolean Literal: type: object diff --git a/ui/src/tanagra-underlay/underlayConfig.ts b/ui/src/tanagra-underlay/underlayConfig.ts index 58f688123..df1af9b50 100644 --- a/ui/src/tanagra-underlay/underlayConfig.ts +++ b/ui/src/tanagra-underlay/underlayConfig.ts @@ -4,6 +4,7 @@ export type SZAttribute = { displayHintRangeMax?: number; displayHintRangeMin?: number; isComputeDisplayHint?: boolean; + isDataTypeRepeated?: boolean; isSuppressedForExport?: boolean; name: string; runtimeDataType?: SZDataType; diff --git a/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java b/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java index ec5975772..134b6e08b 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/filter/AttributeFilter.java @@ -98,6 +98,22 @@ public boolean hasBinaryOperator() { return binaryOperator != null; } + public boolean hasNaryOperator() { + return naryOperator != null; + } + + public String getOperatorName() { + if (hasUnaryOperator()) { + return unaryOperator.name(); + } else if (hasBinaryOperator()) { + return binaryOperator.name(); + } else if (hasNaryOperator()) { + return naryOperator.name(); + } else { + return null; + } + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/underlay/src/main/java/bio/terra/tanagra/api/shared/ValueDisplay.java b/underlay/src/main/java/bio/terra/tanagra/api/shared/ValueDisplay.java index 8e99e5b4a..f637ae437 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/shared/ValueDisplay.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/shared/ValueDisplay.java @@ -1,19 +1,29 @@ package bio.terra.tanagra.api.shared; -import java.util.Objects; +import com.google.common.collect.*; +import java.util.*; public class ValueDisplay { private final Literal value; private final String display; + private final ImmutableList repeatedValue; public ValueDisplay(Literal value) { this.value = value; this.display = null; + this.repeatedValue = null; } public ValueDisplay(Literal value, String display) { this.value = value; this.display = display; + this.repeatedValue = null; + } + + public ValueDisplay(List repeatedValue) { + this.value = null; + this.display = null; + this.repeatedValue = ImmutableList.copyOf(repeatedValue); } public Literal getValue() { @@ -24,6 +34,14 @@ public String getDisplay() { return display; } + public List getRepeatedValue() { + return repeatedValue; + } + + public boolean isRepeatedValue() { + return repeatedValue != null; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -33,11 +51,13 @@ public boolean equals(Object o) { return false; } ValueDisplay that = (ValueDisplay) o; - return value.equals(that.value) && Objects.equals(display, that.display); + return Objects.equals(value, that.value) + && Objects.equals(display, that.display) + && Objects.equals(repeatedValue, that.repeatedValue); } @Override public int hashCode() { - return Objects.hash(value, display); + return Objects.hash(value, display, repeatedValue); } } diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java index 887142c85..eefe2d18f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java @@ -271,8 +271,7 @@ public HintQueryResult run(HintQueryRequest hintQueryRequest) { .getHintedEntity() .getAttribute( sqlRowResult.get(attributeColName, DataType.STRING).getStringVal()); - if (attribute.isValueDisplay() - || attribute.getRuntimeDataType().equals(DataType.STRING)) { + if (attribute.isValueDisplay()) { // This is one (value,count) pair of an enum values hint. Literal enumVal = sqlRowResult.get(enumValColName, DataType.INT64); String enumDisplay = @@ -282,6 +281,13 @@ public HintQueryResult run(HintQueryRequest hintQueryRequest) { enumValues.containsKey(attribute) ? enumValues.get(attribute) : new HashMap<>(); enumValuesForAttr.put(new ValueDisplay(enumVal, enumDisplay), enumCount); enumValues.put(attribute, enumValuesForAttr); + } else if (attribute.getRuntimeDataType().equals(DataType.STRING)) { + Literal enumVal = sqlRowResult.get(enumDisplayColName, DataType.STRING); + Long enumCount = sqlRowResult.get(enumCountColName, DataType.INT64).getInt64Val(); + Map enumValuesForAttr = + enumValues.containsKey(attribute) ? enumValues.get(attribute) : new HashMap<>(); + enumValuesForAttr.put(new ValueDisplay(enumVal), enumCount); + enumValues.put(attribute, enumValuesForAttr); } else { // This is a range hint. Double min = sqlRowResult.get(minColName, DataType.DOUBLE).getDoubleVal(); @@ -383,15 +389,21 @@ private SqlQueryRequest buildQuerySqlAgainstIndexData( ITEntityMain entityMain = underlay.getIndexSchema().getEntityMain(singleEntity.getName()); List selectFieldSqls = new ArrayList<>(); + List joinTableSqls = new ArrayList<>(); selectFields.forEach( valueDisplayField -> { List sqlQueryFields; if (groupByFields.contains(valueDisplayField) && valueDisplayField instanceof AttributeField) { + BQAttributeFieldTranslator attributeFieldTranslator = + (BQAttributeFieldTranslator) bqTranslator.translator(valueDisplayField); sqlQueryFields = - ((BQAttributeFieldTranslator) bqTranslator.translator(valueDisplayField)) - .buildSqlFieldsForCountSelectAndGroupBy( - entityLevelHints.get(valueDisplayField.getEntity())); + attributeFieldTranslator.buildSqlFieldsForCountSelectAndGroupBy( + entityLevelHints.get(valueDisplayField.getEntity())); + String joinTableSql = attributeFieldTranslator.buildSqlJoinTableForCountQuery(); + if (joinTableSql != null) { + joinTableSqls.add(joinTableSql); + } } else { sqlQueryFields = bqTranslator.translator(valueDisplayField).buildSqlFieldsForListSelect(); @@ -401,12 +413,17 @@ private SqlQueryRequest buildQuerySqlAgainstIndexData( sqlQueryField -> selectFieldSqls.add(sqlQueryField.renderForSelect())); }); - // SELECT [select fields] FROM [entity main] + // SELECT [select fields] FROM [entity main] JOIN [join tables] sql.append("SELECT ") .append(String.join(", ", selectFieldSqls)) .append(" FROM ") .append(entityMain.getTablePointer().render()); + // JOIN [join tables] + if (!joinTableSqls.isEmpty()) { + sql.append(" ").append(String.join(" ", joinTableSqls)); + } + // WHERE [filter] EntityFilter singleEntityFilter = filters.get(singleEntity); if (singleEntityFilter != null) { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQRowResult.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQRowResult.java index 5223d1e23..eb76ea3e8 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQRowResult.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQRowResult.java @@ -2,11 +2,11 @@ import bio.terra.tanagra.api.shared.DataType; import bio.terra.tanagra.api.shared.Literal; -import bio.terra.tanagra.exception.InvalidQueryException; import bio.terra.tanagra.query.sql.SqlRowResult; import com.google.cloud.bigquery.FieldValue; import com.google.cloud.bigquery.FieldValueList; import java.sql.Timestamp; +import java.util.*; public class BQRowResult implements SqlRowResult { private final FieldValueList fieldValues; @@ -18,6 +18,18 @@ public BQRowResult(FieldValueList fieldValues) { @Override public Literal get(String columnName, DataType expectedDataType) { FieldValue fieldValue = fieldValues.get(columnName); + return toLiteral(fieldValue, expectedDataType); + } + + @Override + public List getRepeated(String columnName, DataType expectedDataType) { + List repeatedFieldValue = fieldValues.get(columnName).getRepeatedValue(); + return repeatedFieldValue.stream() + .map(fieldValue -> toLiteral(fieldValue, expectedDataType)) + .toList(); + } + + private static Literal toLiteral(FieldValue fieldValue, DataType expectedDataType) { return switch (expectedDataType) { case STRING -> Literal.forString(fieldValue.isNull() ? null : fieldValue.getStringValue()); case INT64 -> Literal.forInt64(fieldValue.isNull() ? null : fieldValue.getLongValue()); @@ -27,9 +39,6 @@ public Literal get(String columnName, DataType expectedDataType) { case TIMESTAMP -> Literal.forTimestamp( fieldValue.isNull() ? null : Timestamp.from(fieldValue.getTimestampInstant())); - default -> - throw new InvalidQueryException( - "Unsupported data type for BigQuery row result: " + expectedDataType); }; } diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/BQApiTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/BQApiTranslator.java index 30316d368..073ceeee9 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/BQApiTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/BQApiTranslator.java @@ -9,6 +9,7 @@ import bio.terra.tanagra.api.field.RelatedEntityIdCountField; import bio.terra.tanagra.api.filter.*; import bio.terra.tanagra.api.filter.TextSearchFilter.TextSearchOperator; +import bio.terra.tanagra.api.shared.*; import bio.terra.tanagra.query.bigquery.translator.field.BQAttributeFieldTranslator; import bio.terra.tanagra.query.bigquery.translator.field.BQCountDistinctFieldTranslator; import bio.terra.tanagra.query.bigquery.translator.field.BQHierarchyIsMemberFieldTranslator; @@ -26,9 +27,12 @@ import bio.terra.tanagra.query.bigquery.translator.filter.BQRelationshipFilterTranslator; import bio.terra.tanagra.query.bigquery.translator.filter.BQTemporalPrimaryFilterTranslator; import bio.terra.tanagra.query.bigquery.translator.filter.BQTextSearchFilterTranslator; +import bio.terra.tanagra.query.sql.*; import bio.terra.tanagra.query.sql.translator.ApiFieldTranslator; import bio.terra.tanagra.query.sql.translator.ApiFilterTranslator; import bio.terra.tanagra.query.sql.translator.ApiTranslator; +import jakarta.annotation.*; +import java.util.*; public final class BQApiTranslator implements ApiTranslator { @Override @@ -116,6 +120,25 @@ public ApiFilterTranslator translator(TemporalPrimaryFilter temporalPrimaryFilte return new BQTemporalPrimaryFilterTranslator(this, temporalPrimaryFilter); } + @Override + public String naryFilterOnRepeatedFieldSql( + SqlField field, + NaryOperator naryOperator, + List values, + @Nullable String tableAlias, + SqlParams sqlParams) { + String functionTemplate = + "EXISTS (SELECT * FROM UNNEST(" + + FUNCTION_TEMPLATE_FIELD_VAR_BRACES + + ") AS flattened WHERE flattened " + + (NaryOperator.IN.equals(naryOperator) ? "IN" : "NOT IN") + + " (" + + FUNCTION_TEMPLATE_VALUES_VAR_BRACES + + "))"; + return functionWithCommaSeparatedArgsFilterSql( + field, functionTemplate, values, tableAlias, sqlParams); + } + @SuppressWarnings("PMD.TooFewBranchesForASwitchStatement") @Override public String textSearchOperatorTemplateSql(TextSearchFilter.TextSearchOperator operator) { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java index a6727ab7b..8e0718e94 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/field/BQAttributeFieldTranslator.java @@ -12,6 +12,7 @@ import bio.terra.tanagra.query.sql.translator.ApiFieldTranslator; import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.indextable.ITEntityMain; +import jakarta.annotation.*; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -34,7 +35,7 @@ public BQAttributeFieldTranslator(AttributeField attributeField) { @Override public List buildSqlFieldsForListSelect() { - return buildSqlFields(true, true); + return buildSqlFields(true, true, false); } public List buildSqlFieldsForCountSelectAndGroupBy( @@ -42,21 +43,39 @@ public List buildSqlFieldsForCountSelectAndGroupBy( return buildSqlFields( true, entityLevelHints == null - || entityLevelHints.getHintInstance(attributeField.getAttribute()).isEmpty()); + || entityLevelHints.getHintInstance(attributeField.getAttribute()).isEmpty(), + true); + } + + public @Nullable String buildSqlJoinTableForCountQuery() { + Attribute attribute = attributeField.getAttribute(); + if (!attribute.isDataTypeRepeated() || attributeField.isAgainstSourceDataset()) { + return null; + } + + SqlField valueField = indexTable.getAttributeValueField(attribute.getName()); + if (attribute.hasRuntimeSqlFunctionWrapper()) { + valueField = valueField.cloneWithFunctionWrapper(attribute.getRuntimeSqlFunctionWrapper()); + } + SqlQueryField valueSqlQueryField = SqlQueryField.of(valueField); + return "CROSS JOIN UNNEST(" + + valueSqlQueryField.renderForSelect() + + ") AS " + + getValueFieldAlias(true); } @Override public List buildSqlFieldsForOrderBy() { - return buildSqlFields(false, true); + return buildSqlFields(false, true, false); } @Override public List buildSqlFieldsForGroupBy() { - return buildSqlFields(true, false); + return buildSqlFields(true, false, true); } private List buildSqlFields( - boolean includeValueField, boolean includeDisplayField) { + boolean includeValueField, boolean includeDisplayField, boolean flattenRepeatedValues) { Attribute attribute = attributeField.getAttribute(); SqlQueryField valueSqlQueryField; @@ -65,12 +84,15 @@ private List buildSqlFields( SqlField valueField = SqlField.of(attribute.getSourceQuery().getValueFieldName()); valueSqlQueryField = SqlQueryField.of(valueField); hasDisplayField = attribute.getSourceQuery().hasDisplayField(); + } else if (attribute.isDataTypeRepeated() && flattenRepeatedValues) { + valueSqlQueryField = SqlQueryField.of(SqlField.of(getValueFieldAlias(true))); + hasDisplayField = false; } else { SqlField valueField = indexTable.getAttributeValueField(attribute.getName()); if (attribute.hasRuntimeSqlFunctionWrapper()) { valueField = valueField.cloneWithFunctionWrapper(attribute.getRuntimeSqlFunctionWrapper()); } - valueSqlQueryField = SqlQueryField.of(valueField, getValueFieldAlias()); + valueSqlQueryField = SqlQueryField.of(valueField, getValueFieldAlias(false)); hasDisplayField = !attribute.isSimple(); } if (!hasDisplayField || attributeField.isExcludeDisplay()) { @@ -92,10 +114,12 @@ private List buildSqlFields( return sqlQueryFields; } - private String getValueFieldAlias() { - return indexTable - .getAttributeValueField(attributeField.getAttribute().getName()) - .getColumnName(); + private String getValueFieldAlias(boolean flattenRepeatedValues) { + String alias = + indexTable.getAttributeValueField(attributeField.getAttribute().getName()).getColumnName(); + return attributeField.getAttribute().isDataTypeRepeated() && flattenRepeatedValues + ? ("FLATTENED_" + alias) + : alias; } private String getDisplayFieldAlias() { @@ -106,21 +130,29 @@ private String getDisplayFieldAlias() { @Override public ValueDisplay parseValueDisplayFromResult(SqlRowResult sqlRowResult) { - Literal valueField = - sqlRowResult.get(getValueFieldAlias(), attributeField.getAttribute().getRuntimeDataType()); - - if (attributeField.getAttribute().isSimple() || attributeField.isExcludeDisplay()) { - return new ValueDisplay(valueField); + if (attributeField.getAttribute().isDataTypeRepeated()) { + List repeatedValueField = + sqlRowResult.getRepeated( + getValueFieldAlias(false), attributeField.getAttribute().getRuntimeDataType()); + return new ValueDisplay(repeatedValueField); } else { - Literal displayField = sqlRowResult.get(getDisplayFieldAlias(), DataType.STRING); - return new ValueDisplay(valueField, displayField.getStringVal()); + Literal valueField = + sqlRowResult.get( + getValueFieldAlias(false), attributeField.getAttribute().getRuntimeDataType()); + if (attributeField.getAttribute().isSimple() || attributeField.isExcludeDisplay()) { + return new ValueDisplay(valueField); + } else { + Literal displayField = sqlRowResult.get(getDisplayFieldAlias(), DataType.STRING); + return new ValueDisplay(valueField, displayField.getStringVal()); + } } } public ValueDisplay parseValueDisplayFromCountResult( SqlRowResult sqlRowResult, HintQueryResult entityLevelHints) { Literal valueField = - sqlRowResult.get(getValueFieldAlias(), attributeField.getAttribute().getRuntimeDataType()); + sqlRowResult.get( + getValueFieldAlias(true), attributeField.getAttribute().getRuntimeDataType()); if (attributeField.getAttribute().isSimple() || attributeField.isExcludeDisplay()) { if (attributeField.isExcludeDisplay()) { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java index 033b73e36..2ca11b39f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/translator/filter/BQAttributeFilterTranslator.java @@ -1,12 +1,15 @@ package bio.terra.tanagra.query.bigquery.translator.filter; import bio.terra.tanagra.api.filter.AttributeFilter; +import bio.terra.tanagra.api.shared.*; +import bio.terra.tanagra.exception.*; import bio.terra.tanagra.query.sql.SqlField; import bio.terra.tanagra.query.sql.SqlParams; import bio.terra.tanagra.query.sql.translator.ApiFilterTranslator; import bio.terra.tanagra.query.sql.translator.ApiTranslator; import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.indextable.ITEntityMain; +import jakarta.annotation.*; public class BQAttributeFilterTranslator extends ApiFilterTranslator { private final AttributeFilter attributeFilter; @@ -33,7 +36,31 @@ public String buildSql(SqlParams sqlParams, String tableAlias) { valueField = valueField.cloneWithFunctionWrapper(attribute.getRuntimeSqlFunctionWrapper()); } - if (attributeFilter.hasUnaryOperator()) { + if (attribute.isDataTypeRepeated()) { + boolean naryOperatorIn = + (attributeFilter.hasBinaryOperator() + && BinaryOperator.EQUALS.equals(attributeFilter.getBinaryOperator())) + || (attributeFilter.hasNaryOperator() + && NaryOperator.IN.equals(attributeFilter.getNaryOperator())); + boolean naryOperatorNotIn = + (attributeFilter.hasBinaryOperator() + && BinaryOperator.NOT_EQUALS.equals(attributeFilter.getBinaryOperator())) + || (attributeFilter.hasNaryOperator() + && NaryOperator.NOT_IN.equals(attributeFilter.getNaryOperator())); + if (!naryOperatorIn && !naryOperatorNotIn) { + throw new InvalidQueryException( + "Operator not supported for repeated data type attributes: " + + attributeFilter.getOperatorName() + + ", " + + attribute.getName()); + } + return apiTranslator.naryFilterOnRepeatedFieldSql( + valueField, + naryOperatorIn ? NaryOperator.IN : NaryOperator.NOT_IN, + attributeFilter.getValues(), + tableAlias, + sqlParams); + } else if (attributeFilter.hasUnaryOperator()) { return apiTranslator.unaryFilterSql( valueField, attributeFilter.getUnaryOperator(), tableAlias, sqlParams); } else if (attributeFilter.hasBinaryOperator()) { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/sql/SqlRowResult.java b/underlay/src/main/java/bio/terra/tanagra/query/sql/SqlRowResult.java index 35dd392b2..a9c124e6f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/sql/SqlRowResult.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/sql/SqlRowResult.java @@ -2,11 +2,15 @@ import bio.terra.tanagra.api.shared.DataType; import bio.terra.tanagra.api.shared.Literal; +import java.util.*; public interface SqlRowResult { /** Get literal value for the column in this row. */ Literal get(String columnName, DataType expectedDataType); + /** Get literal values for the repeated column in this row. */ + List getRepeated(String columnName, DataType expectedDataType); + /** Return the number of {@link bio.terra.tanagra.api.shared.Literal}s in this row. */ int size(); } diff --git a/underlay/src/main/java/bio/terra/tanagra/query/sql/translator/ApiTranslator.java b/underlay/src/main/java/bio/terra/tanagra/query/sql/translator/ApiTranslator.java index 699937d76..7a0503bd0 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/sql/translator/ApiTranslator.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/sql/translator/ApiTranslator.java @@ -135,6 +135,13 @@ default String naryFilterSql( } } + String naryFilterOnRepeatedFieldSql( + SqlField field, + NaryOperator naryOperator, + List values, + @Nullable String tableAlias, + SqlParams sqlParams); + default String functionWithCommaSeparatedArgsFilterSql( SqlField field, String functionTemplate, diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/ColumnSchema.java b/underlay/src/main/java/bio/terra/tanagra/underlay/ColumnSchema.java index 350cb9e22..c950fea02 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/ColumnSchema.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/ColumnSchema.java @@ -7,15 +7,22 @@ public class ColumnSchema implements Serializable { private final String columnName; private final DataType dataType; + private final boolean isDataTypeRepeated; private final boolean isRequired; public ColumnSchema(String columnName, DataType dataType) { - this(columnName, dataType, false); + this(columnName, dataType, false, false); } public ColumnSchema(String columnName, DataType dataType, boolean isRequired) { + this(columnName, dataType, false, isRequired); + } + + public ColumnSchema( + String columnName, DataType dataType, boolean isDataTypeRepeated, boolean isRequired) { this.columnName = columnName; this.dataType = dataType; + this.isDataTypeRepeated = isDataTypeRepeated; this.isRequired = isRequired; } @@ -27,6 +34,10 @@ public DataType getDataType() { return dataType; } + public boolean isDataTypeRepeated() { + return isDataTypeRepeated; + } + public boolean isRequired() { return isRequired; } @@ -42,11 +53,12 @@ public boolean equals(Object o) { ColumnSchema that = (ColumnSchema) o; return isRequired == that.isRequired && columnName.equals(that.columnName) - && dataType == that.dataType; + && dataType == that.dataType + && isDataTypeRepeated == that.isDataTypeRepeated; } @Override public int hashCode() { - return Objects.hash(columnName, dataType, isRequired); + return Objects.hash(columnName, dataType, isDataTypeRepeated, isRequired); } } diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java b/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java index 9c0212ef1..2d14ea65f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/Underlay.java @@ -372,6 +372,7 @@ public static Entity fromConfigEntity(SZEntity szEntity, String primaryEntityNam return new Attribute( szAttribute.name, ConfigReader.deserializeDataType(szAttribute.dataType), + szAttribute.isDataTypeRepeated, szAttribute.displayFieldName != null, szAttribute.name.equals(szEntity.idAttribute), szAttribute.runtimeSqlFunctionWrapper, diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/Attribute.java b/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/Attribute.java index e5cf9ec15..f698fe80a 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/Attribute.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/entitymodel/Attribute.java @@ -6,6 +6,7 @@ public final class Attribute { private final String name; private final DataType dataType; + private final boolean isDataTypeRepeated; private final boolean isValueDisplay; private final boolean isId; private final String runtimeSqlFunctionWrapper; @@ -20,6 +21,7 @@ public final class Attribute { public Attribute( String name, DataType dataType, + boolean isDataTypeRepeated, boolean isValueDisplay, boolean isId, String runtimeSqlFunctionWrapper, @@ -31,6 +33,7 @@ public Attribute( SourceQuery sourceQuery) { this.name = name; this.dataType = dataType; + this.isDataTypeRepeated = isDataTypeRepeated; this.isValueDisplay = isValueDisplay; this.isId = isId; this.runtimeSqlFunctionWrapper = runtimeSqlFunctionWrapper; @@ -50,6 +53,10 @@ public DataType getDataType() { return dataType; } + public boolean isDataTypeRepeated() { + return isDataTypeRepeated; + } + public boolean isSimple() { return !isValueDisplay; } @@ -111,6 +118,7 @@ public boolean equals(Object o) { && isVisitIdForTemporalQuery == attribute.isVisitIdForTemporalQuery && name.equals(attribute.name) && dataType == attribute.dataType + && isDataTypeRepeated == attribute.isDataTypeRepeated && Objects.equals(runtimeSqlFunctionWrapper, attribute.runtimeSqlFunctionWrapper) && runtimeDataType == attribute.runtimeDataType && Objects.equals(sourceQuery, attribute.sourceQuery); @@ -121,6 +129,7 @@ public int hashCode() { return Objects.hash( name, dataType, + isDataTypeRepeated, isValueDisplay, isId, runtimeSqlFunctionWrapper, diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java index 9572f4e9a..bb4edef91 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/indextable/ITEntityMain.java @@ -38,7 +38,10 @@ public ITEntityMain( szAttribute -> { columnSchemasBuilder.add( new ColumnSchema( - szAttribute.name, ConfigReader.deserializeDataType(szAttribute.dataType))); + szAttribute.name, + ConfigReader.deserializeDataType(szAttribute.dataType), + szAttribute.isDataTypeRepeated, + false)); if (szAttribute.displayFieldName != null) { columnSchemasBuilder.add( new ColumnSchema( @@ -107,7 +110,8 @@ public SqlField getAttributeValueField(String attribute) { } public ColumnSchema getAttributeValueColumnSchema(Attribute attribute) { - return new ColumnSchema(attribute.getName(), attribute.getDataType()); + return new ColumnSchema( + attribute.getName(), attribute.getDataType(), attribute.isDataTypeRepeated(), false); } public SqlField getAttributeDisplayField(String attribute) { diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttribute.java b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttribute.java index 19aa404fa..54748d802 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttribute.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZAttribute.java @@ -23,6 +23,13 @@ public class SZAttribute { @AnnotatedField(name = "SZAttribute.dataType", markdown = "Data type of the attribute.") public SZDataType dataType; + @AnnotatedField( + name = "SZAttribute.isDataTypeRepeated", + markdown = "True if the data type is repeated (e.g. an array of ints).", + optional = true, + defaultValue = "false") + public boolean isDataTypeRepeated; + @AnnotatedField( name = "SZAttribute.valueFieldName", markdown = diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STEntityAttributes.java b/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STEntityAttributes.java index 0a0635e74..5ea61742c 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STEntityAttributes.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STEntityAttributes.java @@ -32,7 +32,9 @@ public STEntityAttributes(BQTable bqTable, String entity, List szAt szAttribute.valueFieldName == null ? szAttribute.name : szAttribute.valueFieldName, - ConfigReader.deserializeDataType(szAttribute.dataType))); + ConfigReader.deserializeDataType(szAttribute.dataType), + szAttribute.isDataTypeRepeated, + false)); if (szAttribute.displayFieldName != null) { attributeDisplayColumnSchemasBuilder.put( szAttribute.name, new ColumnSchema(szAttribute.displayFieldName, DataType.STRING)); diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java index 286eaf8ae..dc1690923 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQCountQueryResultsTest.java @@ -13,8 +13,7 @@ import bio.terra.tanagra.api.field.HierarchyPathField; import bio.terra.tanagra.api.field.RelatedEntityIdCountField; import bio.terra.tanagra.api.field.ValueDisplayField; -import bio.terra.tanagra.api.query.count.CountQueryRequest; -import bio.terra.tanagra.api.query.count.CountQueryResult; +import bio.terra.tanagra.api.query.count.*; import bio.terra.tanagra.api.query.hint.HintInstance; import bio.terra.tanagra.api.query.hint.HintQueryResult; import bio.terra.tanagra.api.shared.DataType; @@ -22,11 +21,9 @@ import bio.terra.tanagra.api.shared.OrderByDirection; import bio.terra.tanagra.api.shared.ValueDisplay; import bio.terra.tanagra.query.bigquery.BQRunnerTest; -import bio.terra.tanagra.underlay.entitymodel.Entity; -import bio.terra.tanagra.underlay.entitymodel.Hierarchy; +import bio.terra.tanagra.underlay.entitymodel.*; import bio.terra.tanagra.underlay.entitymodel.entitygroup.EntityGroup; -import java.util.List; -import java.util.Map; +import java.util.*; import org.junit.jupiter.api.Test; public class BQCountQueryResultsTest extends BQRunnerTest { @@ -115,6 +112,111 @@ void attributeField() { }); } + @Test + void repeatedAttributeField() { + Entity entity = underlay.getEntity("condition"); + + // We don't have an example of an attribute with a repeated data type, yet. + // So create an artificial attribute just for this test. + AttributeField repeatedStringAttribute = + new AttributeField( + underlay, + entity, + new Attribute( + "vocabulary", + DataType.STRING, + true, + false, + false, + "['foo', 'bar', 'baz', ${fieldSql}]", + DataType.STRING, + entity.getAttribute("vocabulary").isComputeDisplayHint(), + entity.getAttribute("vocabulary").isSuppressedForExport(), + entity.getAttribute("vocabulary").isVisitDateForTemporalQuery(), + entity.getAttribute("vocabulary").isVisitIdForTemporalQuery(), + entity.getAttribute("vocabulary").getSourceQuery()), + false); + + List groupBys = List.of(repeatedStringAttribute); + HintQueryResult entityLevelHints = + new HintQueryResult( + "", + List.of( + new HintInstance( + entity.getAttribute("vocabulary"), + Map.of( + new ValueDisplay(Literal.forString("foo")), + 25L, + new ValueDisplay(Literal.forString("bar")), + 140L, + new ValueDisplay(Literal.forString("baz")), + 85L)))); + CountQueryResult countQueryResult = + bqQueryRunner.run( + new CountQueryRequest( + underlay, + entity, + null, + groupBys, + null, + OrderByDirection.DESCENDING, + null, + null, + null, + entityLevelHints, + false)); + + // Check each of the group by fields. + countQueryResult.getCountInstances().stream() + .map(countInstance -> countInstance.getEntityFieldValue(repeatedStringAttribute)) + .forEach( + vocabulary -> { + assertNotNull(vocabulary); + assertTrue( + vocabulary.getValue().isNull() + || DataType.STRING.equals(vocabulary.getValue().getDataType())); + assertFalse(vocabulary.isRepeatedValue()); + assertNotNull(vocabulary.getValue().getStringVal()); + assertNull(vocabulary.getDisplay()); + }); + + // Condition entity should have an enum string-value hint with 4 + 3 values for vocabulary. The + // three fake ones should all have matching counts. + assertEquals(7, countQueryResult.getCountInstances().size()); + Optional fooCount = + countQueryResult.getCountInstances().stream() + .filter( + countInstance -> + countInstance + .getEntityFieldValue(repeatedStringAttribute) + .getValue() + .equals(Literal.forString("foo"))) + .findAny(); + assertTrue(fooCount.isPresent()); + Optional barCount = + countQueryResult.getCountInstances().stream() + .filter( + countInstance -> + countInstance + .getEntityFieldValue(repeatedStringAttribute) + .getValue() + .equals(Literal.forString("bar"))) + .findAny(); + assertTrue(barCount.isPresent()); + Optional bazCount = + countQueryResult.getCountInstances().stream() + .filter( + countInstance -> + countInstance + .getEntityFieldValue(repeatedStringAttribute) + .getValue() + .equals(Literal.forString("baz"))) + .findAny(); + assertTrue(bazCount.isPresent()); + assertTrue(fooCount.get().getCount() == barCount.get().getCount()); + assertTrue(barCount.get().getCount() == bazCount.get().getCount()); + } + @Test void hierarchyFields() { Entity entity = underlay.getEntity("condition"); diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java index b557f9e2a..74f98ed15 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQHintQueryResultsTest.java @@ -3,8 +3,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; -import bio.terra.tanagra.api.query.hint.HintQueryRequest; -import bio.terra.tanagra.api.query.hint.HintQueryResult; +import bio.terra.tanagra.api.query.hint.*; import bio.terra.tanagra.api.shared.DataType; import bio.terra.tanagra.api.shared.Literal; import bio.terra.tanagra.query.bigquery.BQRunnerTest; @@ -21,8 +20,42 @@ protected String getServiceConfigName() { } @Test - void entityLevelHint() { - Entity hintedEntity = underlay.getPrimaryEntity(); + void entityLevelHints() { + // Person entity should have range hints for year_of_birth and age (runtime calculated), and an + // enum value-display hint with 3 values for gender. + List hintInstances = checkEntityLevelHints(underlay.getPrimaryEntity()); + assertTrue( + hintInstances.stream() + .anyMatch( + hintInstance -> + hintInstance.getAttribute().getName().equals("year_of_birth") + && hintInstance.isRangeHint())); + assertTrue( + hintInstances.stream() + .anyMatch( + hintInstance -> + hintInstance.getAttribute().getName().equals("age") + && hintInstance.isRangeHint())); + assertTrue( + hintInstances.stream() + .anyMatch( + hintInstance -> + hintInstance.getAttribute().getName().equals("gender") + && hintInstance.isEnumHint() + && hintInstance.getEnumValueCounts().size() == 3)); + + // Brand entity should have an enum string-value hint with 2 values for vocabulary. + hintInstances = checkEntityLevelHints(underlay.getEntity("brand")); + assertTrue( + hintInstances.stream() + .anyMatch( + hintInstance -> + hintInstance.getAttribute().getName().equals("vocabulary") + && hintInstance.isEnumHint() + && hintInstance.getEnumValueCounts().size() == 2)); + } + + private List checkEntityLevelHints(Entity hintedEntity) { HintQueryResult hintQueryResult = bqQueryRunner.run(new HintQueryRequest(underlay, hintedEntity, null, null, null, false)); @@ -43,8 +76,10 @@ void entityLevelHint() { assertTrue(hintInstance.getMin() <= hintInstance.getMax()); } else { // isEnumHint assertTrue( - attribute.isValueDisplay() - || attribute.getRuntimeDataType().equals(DataType.STRING)); + (attribute.isValueDisplay() + && DataType.INT64.equals(attribute.getRuntimeDataType())) + || (attribute.isSimple() + && DataType.STRING.equals(attribute.getRuntimeDataType()))); assertFalse(hintInstance.getEnumValueCounts().isEmpty()); hintInstance .getEnumValueCounts() @@ -58,6 +93,7 @@ void entityLevelHint() { .equals(enumValue.getValue().getDataType()))); } }); + return hintQueryResult.getHintInstances(); } @Test diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQListQueryResultsTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQListQueryResultsTest.java index a49b528c3..7d49fe244 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQListQueryResultsTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/resultparsing/BQListQueryResultsTest.java @@ -20,8 +20,7 @@ import bio.terra.tanagra.api.shared.OrderByDirection; import bio.terra.tanagra.api.shared.ValueDisplay; import bio.terra.tanagra.query.bigquery.BQRunnerTest; -import bio.terra.tanagra.underlay.entitymodel.Entity; -import bio.terra.tanagra.underlay.entitymodel.Hierarchy; +import bio.terra.tanagra.underlay.entitymodel.*; import bio.terra.tanagra.underlay.entitymodel.entitygroup.EntityGroup; import java.util.List; import org.junit.jupiter.api.Test; @@ -41,12 +40,34 @@ void attributeField() { AttributeField idAttribute = new AttributeField(underlay, entity, entity.getIdAttribute(), false); + // We don't have an example of an attribute with a repeated data type, yet. + // So create an artificial attribute just for this test. + AttributeField repeatedStringAttribute = + new AttributeField( + underlay, + entity, + new Attribute( + "person_source_value", + DataType.STRING, + true, + false, + false, + "['foo', 'bar', 'baz', ${fieldSql}]", + DataType.STRING, + entity.getIdAttribute().isComputeDisplayHint(), + entity.getIdAttribute().isSuppressedForExport(), + entity.getIdAttribute().isVisitDateForTemporalQuery(), + entity.getIdAttribute().isVisitIdForTemporalQuery(), + entity.getIdAttribute().getSourceQuery()), + false); + List selectAttributes = List.of( simpleAttribute, valueDisplayAttribute, valueDisplayAttributeWithoutDisplay, - runtimeCalculatedAttribute); + runtimeCalculatedAttribute, + repeatedStringAttribute); List orderBys = List.of(new OrderBy(idAttribute, OrderByDirection.DESCENDING)); int limit = 5; ListQueryResult listQueryResult = @@ -92,6 +113,13 @@ void attributeField() { age.getValue().isNull() || DataType.INT64.equals(age.getValue().getDataType())); assertNotNull(age.getValue().getInt64Val()); assertNull(age.getDisplay()); + + ValueDisplay repeatedString = + listInstance.getEntityFieldValue(repeatedStringAttribute); + assertNotNull(repeatedString); + assertTrue(repeatedString.isRepeatedValue()); + assertNotNull(repeatedString.getRepeatedValue()); + assertEquals(4, repeatedString.getRepeatedValue().size()); }); } diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQCountQueryTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQCountQueryTest.java index 2c30166ab..88ed46ece 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQCountQueryTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQCountQueryTest.java @@ -6,10 +6,7 @@ import bio.terra.tanagra.api.query.count.CountQueryResult; import bio.terra.tanagra.api.query.hint.HintInstance; import bio.terra.tanagra.api.query.hint.HintQueryResult; -import bio.terra.tanagra.api.shared.BinaryOperator; -import bio.terra.tanagra.api.shared.Literal; -import bio.terra.tanagra.api.shared.OrderByDirection; -import bio.terra.tanagra.api.shared.ValueDisplay; +import bio.terra.tanagra.api.shared.*; import bio.terra.tanagra.query.bigquery.BQRunnerTest; import bio.terra.tanagra.query.bigquery.BQTable; import bio.terra.tanagra.underlay.entitymodel.Attribute; @@ -156,6 +153,61 @@ void groupByValueDisplayAttribute() throws IOException { "groupByValueDisplayField", countQueryResult.getSql(), entityMainTable); } + @Test + void groupByRepeatedAttribute() throws IOException { + Entity entity = underlay.getEntity("condition"); + + // We don't have an example of an attribute with a repeated data type, yet. + // So create an artificial attribute just for this test. + Attribute groupByAttribute = + new Attribute( + "vocabulary", + DataType.STRING, + true, + false, + false, + "['foo', 'bar', 'baz', ${fieldSql}]", + DataType.STRING, + entity.getAttribute("vocabulary").isComputeDisplayHint(), + entity.getAttribute("vocabulary").isSuppressedForExport(), + entity.getAttribute("vocabulary").isVisitDateForTemporalQuery(), + entity.getAttribute("vocabulary").isVisitIdForTemporalQuery(), + entity.getAttribute("vocabulary").getSourceQuery()); + AttributeField groupByAttributeField = + new AttributeField(underlay, entity, groupByAttribute, false); + HintQueryResult hintQueryResult = + new HintQueryResult( + "", + List.of( + new HintInstance( + groupByAttribute, + Map.of( + new ValueDisplay(Literal.forString("foo")), + 25L, + new ValueDisplay(Literal.forString("bar")), + 140L, + new ValueDisplay(Literal.forString("baz")), + 85L)))); + CountQueryResult countQueryResult = + bqQueryRunner.run( + new CountQueryRequest( + underlay, + entity, + null, + List.of(groupByAttributeField), + null, + OrderByDirection.DESCENDING, + null, + null, + null, + hintQueryResult, + true)); + BQTable entityMainTable = + underlay.getIndexSchema().getEntityMain(entity.getName()).getTablePointer(); + assertSqlMatchesWithTableNameOnly( + "groupByRepeatedAttribute", countQueryResult.getSql(), entityMainTable); + } + @Test void countDistinctAttributeNotId() throws IOException { Entity entity = underlay.getEntity("conditionOccurrence"); diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQFieldTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQFieldTest.java index fc3b0462a..12ba89b06 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQFieldTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/BQFieldTest.java @@ -88,6 +88,7 @@ void attributeFieldAgainstSourceData() throws IOException { new Attribute( "ethnicityNoDisplayJoin", ethnicityAttribute.getDataType(), + ethnicityAttribute.isDataTypeRepeated(), ethnicityAttribute.isValueDisplay(), ethnicityAttribute.isId(), ethnicityAttribute.getRuntimeSqlFunctionWrapper(), @@ -110,6 +111,7 @@ void attributeFieldAgainstSourceData() throws IOException { new Attribute( "genderSuppressed", genderAttribute.getDataType(), + genderAttribute.isDataTypeRepeated(), genderAttribute.isValueDisplay(), genderAttribute.isId(), genderAttribute.getRuntimeSqlFunctionWrapper(), diff --git a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java index 8b58e250e..70bf325c9 100644 --- a/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/query/bigquery/sqlbuilding/filter/BQAttributeFilterTest.java @@ -4,10 +4,7 @@ import bio.terra.tanagra.api.filter.AttributeFilter; import bio.terra.tanagra.api.query.list.ListQueryRequest; import bio.terra.tanagra.api.query.list.ListQueryResult; -import bio.terra.tanagra.api.shared.BinaryOperator; -import bio.terra.tanagra.api.shared.Literal; -import bio.terra.tanagra.api.shared.NaryOperator; -import bio.terra.tanagra.api.shared.UnaryOperator; +import bio.terra.tanagra.api.shared.*; import bio.terra.tanagra.query.bigquery.BQRunnerTest; import bio.terra.tanagra.query.bigquery.BQTable; import bio.terra.tanagra.underlay.entitymodel.Attribute; @@ -75,4 +72,58 @@ void attributeFilter() throws IOException { assertSqlMatchesWithTableNameOnly( "attributeFilterNaryBetween", listQueryResult.getSql(), table); } + + @Test + void repeatedAttributeFilter() throws IOException { + Entity entity = underlay.getEntity("condition"); + + // We don't have an example of an attribute with a repeated data type, yet. + // So create an artificial attribute just for this test. + Attribute attribute = + new Attribute( + "vocabulary", + DataType.STRING, + true, + false, + false, + "['foo', 'bar', 'baz', ${fieldSql}]", + DataType.STRING, + entity.getAttribute("vocabulary").isComputeDisplayHint(), + entity.getAttribute("vocabulary").isSuppressedForExport(), + entity.getAttribute("vocabulary").isVisitDateForTemporalQuery(), + entity.getAttribute("vocabulary").isVisitIdForTemporalQuery(), + entity.getAttribute("vocabulary").getSourceQuery()); + + // Filter with binary operator NOT_EQUALS. + AttributeFilter attributeFilter = + new AttributeFilter( + underlay, entity, attribute, BinaryOperator.NOT_EQUALS, Literal.forString("SNOMED")); + AttributeField simpleAttribute = + new AttributeField(underlay, entity, entity.getAttribute("name"), false); + ListQueryResult listQueryResult = + bqQueryRunner.run( + ListQueryRequest.dryRunAgainstIndexData( + underlay, entity, List.of(simpleAttribute), attributeFilter, null, null)); + BQTable table = underlay.getIndexSchema().getEntityMain(entity.getName()).getTablePointer(); + assertSqlMatchesWithTableNameOnly( + "repeatedAttributeFilterBinaryNotEquals", listQueryResult.getSql(), table); + + // Filter with n-ary operator IN. + attributeFilter = + new AttributeFilter( + underlay, + entity, + attribute, + NaryOperator.IN, + List.of( + Literal.forString("bar"), + Literal.forString("ICD9CM"), + Literal.forString("SNOMED"))); + listQueryResult = + bqQueryRunner.run( + ListQueryRequest.dryRunAgainstIndexData( + underlay, entity, List.of(simpleAttribute), attributeFilter, null, null)); + assertSqlMatchesWithTableNameOnly( + "repeatedAttributeFilterNaryIn", listQueryResult.getSql(), table); + } } diff --git a/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinaryNotEquals.sql b/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinaryNotEquals.sql new file mode 100644 index 000000000..31b9e744c --- /dev/null +++ b/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterBinaryNotEquals.sql @@ -0,0 +1,12 @@ + + SELECT + name + FROM + ${ENT_condition} + WHERE + EXISTS (SELECT + * + FROM + UNNEST(['foo', 'bar', 'baz', vocabulary]) AS flattened + WHERE + flattened NOT IN (@val0)) diff --git a/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterNaryIn.sql b/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterNaryIn.sql new file mode 100644 index 000000000..741e11d17 --- /dev/null +++ b/underlay/src/test/resources/sql/BQAttributeFilterTest/repeatedAttributeFilterNaryIn.sql @@ -0,0 +1,12 @@ + + SELECT + name + FROM + ${ENT_condition} + WHERE + EXISTS (SELECT + * + FROM + UNNEST(['foo', 'bar', 'baz', vocabulary]) AS flattened + WHERE + flattened IN (@val0, @val1, @val2)) diff --git a/underlay/src/test/resources/sql/BQCountQueryTest/groupByRepeatedAttribute.sql b/underlay/src/test/resources/sql/BQCountQueryTest/groupByRepeatedAttribute.sql new file mode 100644 index 000000000..b28a02293 --- /dev/null +++ b/underlay/src/test/resources/sql/BQCountQueryTest/groupByRepeatedAttribute.sql @@ -0,0 +1,12 @@ + + SELECT + COUNT(id) AS T_CTDT, + FLATTENED_vocabulary + FROM + ${ENT_condition} + CROSS JOIN + UNNEST(['foo', 'bar', 'baz', vocabulary]) AS FLATTENED_vocabulary + GROUP BY + FLATTENED_vocabulary + ORDER BY + T_CTDT DESC