diff --git a/annotationProcessor/src/main/java/bio/terra/tanagra/annotation/UnderlayConfigPath.java b/annotationProcessor/src/main/java/bio/terra/tanagra/annotation/UnderlayConfigPath.java index c7c3f209b..74bf30697 100644 --- a/annotationProcessor/src/main/java/bio/terra/tanagra/annotation/UnderlayConfigPath.java +++ b/annotationProcessor/src/main/java/bio/terra/tanagra/annotation/UnderlayConfigPath.java @@ -1,20 +1,6 @@ package bio.terra.tanagra.annotation; -import bio.terra.tanagra.underlay.serialization.SZAttribute; -import bio.terra.tanagra.underlay.serialization.SZBigQuery; -import bio.terra.tanagra.underlay.serialization.SZCorePlugin; -import bio.terra.tanagra.underlay.serialization.SZCriteriaOccurrence; -import bio.terra.tanagra.underlay.serialization.SZCriteriaSelector; -import bio.terra.tanagra.underlay.serialization.SZDataType; -import bio.terra.tanagra.underlay.serialization.SZEntity; -import bio.terra.tanagra.underlay.serialization.SZGroupItems; -import bio.terra.tanagra.underlay.serialization.SZHierarchy; -import bio.terra.tanagra.underlay.serialization.SZIndexer; -import bio.terra.tanagra.underlay.serialization.SZPrepackagedCriteria; -import bio.terra.tanagra.underlay.serialization.SZService; -import bio.terra.tanagra.underlay.serialization.SZTextSearch; -import bio.terra.tanagra.underlay.serialization.SZUnderlay; -import bio.terra.tanagra.underlay.serialization.SZVisualization; +import bio.terra.tanagra.underlay.serialization.*; import java.util.List; @SuppressWarnings("PMD.CouplingBetweenObjects") @@ -52,6 +38,7 @@ public class UnderlayConfigPath extends AnnotationPath { SZCriteriaSelector.Display.class, SZCriteriaSelector.Modifier.class, SZPrepackagedCriteria.class, + SZRollupCountsSql.class, SZVisualization.class, SZCorePlugin.class); diff --git a/docs/generated/UNDERLAY_CONFIG.md b/docs/generated/UNDERLAY_CONFIG.md index 9f20df40b..9a4058c05 100644 --- a/docs/generated/UNDERLAY_CONFIG.md +++ b/docs/generated/UNDERLAY_CONFIG.md @@ -23,6 +23,7 @@ This documentation is generated from annotations in the configuration classes. * [SZPrepackagedCriteria](#szprepackagedcriteria) * [SZPrimaryCriteriaRelationship](#szprimarycriteriarelationship) * [SZPrimaryRelationship](#szprimaryrelationship) +* [SZRollupCountsSql](#szrollupcountssql) * [SZService](#szservice) * [SZSourceData](#szsourcedata) * [SZSourceQuery](#szsourcequery) @@ -642,7 +643,7 @@ Required if the [id pairs SQL](#szgroupitemsidpairssqlfile) is defined. Name of the group entity - items entity id pairs SQL file. -If this property is set, then the [id pairs SQL](#szgroupitemsidpairssqlfile) must be unset. File must be in the same directory as the entity group file. Name includes file extension. +File must be in the same directory as the entity group file. Name includes file extension. There can be other columns selected in the SQL file (e.g. `SELECT * FROM relationships`), but the group and items entity ids are required. If this property is set, then the [foreign key atttribute](#szgroupitemsforeignkeyattributeitemsentity) must be unset. @@ -669,6 +670,11 @@ Name of the entity group. This is the unique identifier for the entity group. In a single underlay, the entity group names of any group type cannot overlap. Name may not include spaces or special characters, only letters and numbers. The first character must be a letter. +### SZGroupItems.rollupCountsSql +**optional** [SZRollupCountsSql](#szrollupcountssql) + +Pointer to SQL that returns entity id - rollup count (= number of related entity instances) pairs. + ### SZGroupItems.useSourceIdPairsSql **optional** boolean @@ -967,6 +973,36 @@ Name of the field or column name that maps to the primary entity id. Required if +## SZRollupCountsSql +Pointer to SQL that returns entity id - rollup count (= number of related entity instances) pairs (e.g. variant - number of people). Useful when there's an easy way to calculate these in SQL and we want to avoid ingesting the full entity - related entity relationship id pairs table into Dataflow. + +### SZRollupCountsSql.entityIdFieldName +**required** String + +Name of the field or column name that maps to the entity id. + +*Example value:* `entity_id` + +### SZRollupCountsSql.rollupCountFieldName +**required** String + +Name of the field or column name that maps to the rollup count per entity id. + +*Example value:* `rollup_count` + +### SZRollupCountsSql.rollupCountsSqlFile +**required** String + +Name of the entity id - rollup counts (= number of items entity instances) pairs SQL file. + +File must be in the same directory as the entity/group file. Name includes file extension. + +There can be other columns selected in the SQL file (e.g. `SELECT * FROM relationships`), but the entity id and rollup count fields are required. + +*Example value:* `rollupCounts.sql` + + + ## SZService Service configuration. diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java b/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java index 6a607e13e..ecd329b45 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java @@ -31,11 +31,7 @@ import bio.terra.tanagra.underlay.indextable.ITHierarchyChildParent; import bio.terra.tanagra.underlay.indextable.ITRelationshipIdPairs; import bio.terra.tanagra.underlay.serialization.SZIndexer; -import bio.terra.tanagra.underlay.sourcetable.STEntityAttributes; -import bio.terra.tanagra.underlay.sourcetable.STHierarchyChildParent; -import bio.terra.tanagra.underlay.sourcetable.STHierarchyRootFilter; -import bio.terra.tanagra.underlay.sourcetable.STRelationshipIdPairs; -import bio.terra.tanagra.underlay.sourcetable.STTextSearchTerms; +import bio.terra.tanagra.underlay.sourcetable.*; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; @@ -192,6 +188,14 @@ public static SequencedJobSet getJobSetForGroupItems( groupItems.getGroupEntity().getName(), groupItems.getItemsEntity().getName()) : null; + STRelationshipRollupCounts groupItemsRelationshipRollupCountsTable = + underlay + .getSourceSchema() + .getRelationshipRollupCounts( + groupItems.getName(), + groupItems.getGroupEntity().getName(), + groupItems.getItemsEntity().getName()) + .orElse(null); jobSet.addJob( new WriteRollupCounts( indexerConfig, @@ -203,7 +207,8 @@ public static SequencedJobSet getJobSetForGroupItems( itemsEntityIndexTable, groupItemsIdPairsTable, null, - null)); + null, + groupItemsRelationshipRollupCountsTable)); // If the criteria entity has hierarchies, then also compute the criteria rollup counts for each // hierarchy. @@ -228,7 +233,8 @@ public static SequencedJobSet getJobSetForGroupItems( underlay .getIndexSchema() .getHierarchyAncestorDescendant( - groupItems.getGroupEntity().getName(), hierarchy.getName())))); + groupItems.getGroupEntity().getName(), hierarchy.getName()), + null))); } if (groupItems.getGroupEntity().hasHierarchies()) { @@ -378,6 +384,7 @@ public static SequencedJobSet getJobSetForCriteriaOccurrence( primaryEntityIndexTable, primaryCriteriaIdPairsTable, null, + null, null)); // If the criteria entity has hierarchies, then also compute the criteria rollup counts for each @@ -404,7 +411,8 @@ public static SequencedJobSet getJobSetForCriteriaOccurrence( .getIndexSchema() .getHierarchyAncestorDescendant( criteriaOccurrence.getCriteriaEntity().getName(), - hierarchy.getName())))); + hierarchy.getName()), + null))); } // Compute instance-level display hints for the occurrence entity attributes that are flagged as diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java index 136851088..1bf0ed4c9 100644 --- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java +++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java @@ -17,6 +17,8 @@ import bio.terra.tanagra.underlay.indextable.ITHierarchyAncestorDescendant; import bio.terra.tanagra.underlay.indextable.ITRelationshipIdPairs; import bio.terra.tanagra.underlay.serialization.SZIndexer; +import bio.terra.tanagra.underlay.sourcetable.*; +import com.google.api.*; import com.google.api.services.bigquery.model.TableFieldSchema; import com.google.api.services.bigquery.model.TableRow; import com.google.api.services.bigquery.model.TableSchema; @@ -78,6 +80,7 @@ public class WriteRollupCounts extends BigQueryJob { private final @Nullable ITRelationshipIdPairs relationshipIdPairsIndexTable; private final @Nullable Hierarchy hierarchy; private final @Nullable ITHierarchyAncestorDescendant ancestorDescendantTable; + private final @Nullable STRelationshipRollupCounts relationshipRollupCountsSourceTable; @SuppressWarnings({"checkstyle:ParameterNumber", "PMD.ExcessiveParameterList"}) public WriteRollupCounts( @@ -90,7 +93,8 @@ public WriteRollupCounts( ITEntityMain countedEntityIndexTable, @Nullable ITRelationshipIdPairs relationshipIdPairsIndexTable, @Nullable Hierarchy hierarchy, - @Nullable ITHierarchyAncestorDescendant ancestorDescendantTable) { + @Nullable ITHierarchyAncestorDescendant ancestorDescendantTable, + @Nullable STRelationshipRollupCounts relationshipRollupCountsSourceTable) { super(indexerConfig); this.entityGroup = entityGroup; this.entity = entity; @@ -101,6 +105,7 @@ public WriteRollupCounts( this.relationshipIdPairsIndexTable = relationshipIdPairsIndexTable; this.hierarchy = hierarchy; this.ancestorDescendantTable = ancestorDescendantTable; + this.relationshipRollupCountsSourceTable = relationshipRollupCountsSourceTable; } @Override @@ -136,16 +141,20 @@ && outputTableHasAtLeastOneRowWithNotNullField( @Override public void run(boolean isDryRun) { - // Only run the Dataflow job if the temp table hasn't been written yet. - Optional tempTable = - googleBigQuery.getTable( - indexerConfig.bigQuery.indexData.projectId, - indexerConfig.bigQuery.indexData.datasetId, - getTempTableName()); - if (tempTable.isEmpty()) { - writeFieldsToTempTable(isDryRun); + if (relationshipRollupCountsSourceTable == null) { + // Only run the Dataflow job if the temp table hasn't been written yet. + Optional
tempTable = + googleBigQuery.getTable( + indexerConfig.bigQuery.indexData.projectId, + indexerConfig.bigQuery.indexData.datasetId, + getTempTableName()); + if (tempTable.isEmpty()) { + writeFieldsToTempTable(isDryRun); + } else { + LOGGER.info("Temp table has already been written. Skipping Dataflow job."); + } } else { - LOGGER.info("Temp table has already been written. Skipping Dataflow job."); + LOGGER.info("Rollup counts source SQL is defined. Skipping Dataflow job."); } // Dataflow jobs can only write new rows to BigQuery, so in this second step, copy over the @@ -353,21 +362,30 @@ private void copyFieldsToEntityTable(boolean isDryRun) { indexTable.getEntityGroupCountField( entityGroup.getName(), hierarchy == null ? null : hierarchy.getName()); - BQTable tempBQTable = - new BQTable( - indexerConfig.bigQuery.indexData.projectId, - indexerConfig.bigQuery.indexData.datasetId, - getTempTableName()); - SqlField tempTableIdField = SqlField.of(entityTableIdField.getColumnName()); - SqlField tempTableCountField = SqlField.of(entityTableCountField.getColumnName()); - String tempTableSql = + BQTable rollupCountsBQTable; + SqlField rollupCountsTableIdField; + SqlField rollupCountsTableCountField; + if (relationshipRollupCountsSourceTable == null) { + rollupCountsBQTable = + new BQTable( + indexerConfig.bigQuery.indexData.projectId, + indexerConfig.bigQuery.indexData.datasetId, + getTempTableName()); + rollupCountsTableIdField = SqlField.of(entityTableIdField.getColumnName()); + rollupCountsTableCountField = SqlField.of(entityTableCountField.getColumnName()); + } else { + rollupCountsBQTable = relationshipRollupCountsSourceTable.getTablePointer(); + rollupCountsTableIdField = relationshipRollupCountsSourceTable.getEntityIdField(); + rollupCountsTableCountField = relationshipRollupCountsSourceTable.getCountField(); + } + String rollupCountsTableSql = "SELECT " - + SqlQueryField.of(tempTableIdField).renderForSelect() + + SqlQueryField.of(rollupCountsTableIdField).renderForSelect() + ", " - + SqlQueryField.of(tempTableCountField).renderForSelect() + + SqlQueryField.of(rollupCountsTableCountField).renderForSelect() + " FROM " - + tempBQTable.render(); - LOGGER.info("temp table query: {}", tempTableSql); + + rollupCountsBQTable.render(); + LOGGER.info("rollup counts table query: {}", rollupCountsTableSql); // Build an update-from-select query for the index entity main table and the // id-count query. @@ -381,19 +399,19 @@ private void copyFieldsToEntityTable(boolean isDryRun) { + " SET " + SqlQueryField.of(entityTableCountField).renderForSelect(updateTableAlias) + " = " - + SqlQueryField.of(tempTableCountField).renderForSelect(tempTableAlias) + + SqlQueryField.of(rollupCountsTableCountField).renderForSelect(tempTableAlias) + " FROM (SELECT " - + SqlQueryField.of(tempTableCountField).renderForSelect() + + SqlQueryField.of(rollupCountsTableCountField).renderForSelect() + ", " - + SqlQueryField.of(tempTableIdField).renderForSelect() + + SqlQueryField.of(rollupCountsTableIdField).renderForSelect() + " FROM " - + tempBQTable.render() + + rollupCountsBQTable.render() + ") AS " + tempTableAlias + " WHERE " + SqlQueryField.of(entityTableIdField).renderForSelect(updateTableAlias) + " = " - + SqlQueryField.of(tempTableIdField).renderForSelect(tempTableAlias); + + SqlQueryField.of(rollupCountsTableIdField).renderForSelect(tempTableAlias); LOGGER.info("update-from-select query: {}", updateFromSelectSql); // Run the update-from-select to write the count field in the index entity main table. diff --git a/ui/src/tanagra-underlay/underlayConfig.ts b/ui/src/tanagra-underlay/underlayConfig.ts index 6e5f58f62..5a7a1ccc2 100644 --- a/ui/src/tanagra-underlay/underlayConfig.ts +++ b/ui/src/tanagra-underlay/underlayConfig.ts @@ -114,6 +114,7 @@ export type SZGroupItems = { itemsEntity: string; itemsEntityIdFieldName?: string; name: string; + rollupCountsSql?: SZRollupCountsSql; useSourceIdPairsSql?: boolean; }; @@ -176,6 +177,12 @@ export type SZPrimaryRelationship = { primaryEntityIdFieldName?: string; }; +export type SZRollupCountsSql = { + entityIdFieldName: string; + rollupCountFieldName: string; + sqlFile: string; +}; + export type SZService = { bigQuery: SZBigQuery; underlay: string; diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/SourceSchema.java b/underlay/src/main/java/bio/terra/tanagra/underlay/SourceSchema.java index 731dfba04..0d698e637 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/SourceSchema.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/SourceSchema.java @@ -6,15 +6,10 @@ import bio.terra.tanagra.underlay.serialization.SZEntity; import bio.terra.tanagra.underlay.serialization.SZGroupItems; import bio.terra.tanagra.underlay.serialization.SZUnderlay; -import bio.terra.tanagra.underlay.sourcetable.STEntityAttributes; -import bio.terra.tanagra.underlay.sourcetable.STHierarchyChildParent; -import bio.terra.tanagra.underlay.sourcetable.STHierarchyRootFilter; -import bio.terra.tanagra.underlay.sourcetable.STRelationshipIdPairs; -import bio.terra.tanagra.underlay.sourcetable.STTextSearchTerms; +import bio.terra.tanagra.underlay.sourcetable.*; import com.google.common.collect.ImmutableList; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import java.util.ArrayList; -import java.util.List; +import java.util.*; @SuppressFBWarnings( value = "NP_UNWRITTEN_PUBLIC_OR_PROTECTED_FIELD", @@ -26,18 +21,21 @@ public final class SourceSchema { private final ImmutableList hierarchyChildParentTables; private final ImmutableList hierarchyRootFilterTables; private final ImmutableList relationshipIdPairTables; + private final ImmutableList relationshipRollupCountTables; private SourceSchema( List entityAttributesTables, List textSearchTermsTables, List hierarchyChildParentTables, List hierarchyRootFilterTables, - List relationshipIdPairTables) { + List relationshipIdPairTables, + List relationshipRollupCountTables) { this.entityAttributesTables = ImmutableList.copyOf(entityAttributesTables); this.textSearchTermsTables = ImmutableList.copyOf(textSearchTermsTables); this.hierarchyChildParentTables = ImmutableList.copyOf(hierarchyChildParentTables); this.hierarchyRootFilterTables = ImmutableList.copyOf(hierarchyRootFilterTables); this.relationshipIdPairTables = ImmutableList.copyOf(relationshipIdPairTables); + this.relationshipRollupCountTables = ImmutableList.copyOf(relationshipRollupCountTables); } public STEntityAttributes getEntityAttributes(String entity) { @@ -99,6 +97,17 @@ public STRelationshipIdPairs getRelationshipIdPairs( .orElseThrow(); } + public Optional getRelationshipRollupCounts( + String entityGroup, String entity, String countedEntity) { + return relationshipRollupCountTables.stream() + .filter( + relationshipRollupCounts -> + relationshipRollupCounts.getEntityGroup().equals(entityGroup) + && relationshipRollupCounts.getEntity().equals(entity) + && relationshipRollupCounts.getCountedEntity().equals(countedEntity)) + .findFirst(); + } + public static SourceSchema fromConfig( SZBigQuery szBigQuery, SZUnderlay szUnderlay, ConfigReader configReader) { List entityAttributesTables = new ArrayList<>(); @@ -106,6 +115,7 @@ public static SourceSchema fromConfig( List hierarchyChildParentTables = new ArrayList<>(); List hierarchyRootFilterTables = new ArrayList<>(); List relationshipIdPairTables = new ArrayList<>(); + List relationshipRollupCountTables = new ArrayList<>(); // Build source tables for each entity. szUnderlay.entities.forEach( @@ -121,7 +131,11 @@ public static SourceSchema fromConfig( // Build source tables for each entity group. szUnderlay.groupItemsEntityGroups.forEach( groupItemsPath -> - fromConfigGroupItems(groupItemsPath, configReader, relationshipIdPairTables)); + fromConfigGroupItems( + groupItemsPath, + configReader, + relationshipIdPairTables, + relationshipRollupCountTables)); szUnderlay.criteriaOccurrenceEntityGroups.forEach( criteriaOccurrencePath -> fromConfigCriteriaOccurrence( @@ -134,7 +148,8 @@ public static SourceSchema fromConfig( textSearchTermsTables, hierarchyChildParentTables, hierarchyRootFilterTables, - relationshipIdPairTables); + relationshipIdPairTables, + relationshipRollupCountTables); } private static void fromConfigEntity( @@ -184,7 +199,8 @@ private static void fromConfigEntity( private static void fromConfigGroupItems( String groupItemsPath, ConfigReader configReader, - List relationshipIdPairTables) { + List relationshipIdPairTables, + List relationshipRollupCountTables) { SZGroupItems szGroupItems = configReader.readGroupItems(groupItemsPath); if (szGroupItems.idPairsSqlFile != null) { // RelationshipIdPairs table. @@ -200,6 +216,20 @@ private static void fromConfigGroupItems( szGroupItems.groupEntityIdFieldName, szGroupItems.itemsEntityIdFieldName)); } + if (szGroupItems.rollupCountsSql != null) { + // RelationshipRollupCounts table. + String rollupCountsSql = + configReader.readEntityGroupSql(groupItemsPath, szGroupItems.rollupCountsSql.sqlFile); + BQTable rollupCountsTable = new BQTable(rollupCountsSql); + relationshipRollupCountTables.add( + new STRelationshipRollupCounts( + rollupCountsTable, + szGroupItems.name, + szGroupItems.groupEntity, + szGroupItems.itemsEntity, + szGroupItems.rollupCountsSql.entityIdFieldName, + szGroupItems.rollupCountsSql.rollupCountFieldName)); + } } private static void fromConfigCriteriaOccurrence( diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZGroupItems.java b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZGroupItems.java index ae996fcda..a1c01c647 100644 --- a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZGroupItems.java +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZGroupItems.java @@ -38,7 +38,6 @@ public class SZGroupItems { name = "SZGroupItems.idPairsSqlFile", markdown = "Name of the group entity - items entity id pairs SQL file.\n\n" - + "If this property is set, then the [id pairs SQL](${SZGroupItems.idPairsSqlFile}) must be unset. " + "File must be in the same directory as the entity group file. Name includes file extension.\n\n" + "There can be other columns selected in the SQL file (e.g. `SELECT * FROM relationships`), but the " + "group and items entity ids are required. If this property is set, then the " @@ -73,4 +72,11 @@ public class SZGroupItems { optional = true, exampleValue = "items_id") public String itemsEntityIdFieldName; + + @AnnotatedField( + name = "SZGroupItems.rollupCountsSql", + markdown = + "Pointer to SQL that returns entity id - rollup count (= number of related entity instances) pairs.", + optional = true) + public SZRollupCountsSql rollupCountsSql; } diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZRollupCountsSql.java b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZRollupCountsSql.java new file mode 100644 index 000000000..7bccd23ed --- /dev/null +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZRollupCountsSql.java @@ -0,0 +1,33 @@ +package bio.terra.tanagra.underlay.serialization; + +import bio.terra.tanagra.annotation.*; + +@AnnotatedClass( + name = "SZRollupCountsSql", + markdown = + "Pointer to SQL that returns entity id - rollup count (= number of related entity instances) pairs " + + "(e.g. variant - number of people). Useful when there's an easy way to calculate these in SQL " + + "and we want to avoid ingesting the full entity - related entity relationship id pairs table into Dataflow.") +public class SZRollupCountsSql { + @AnnotatedField( + name = "SZRollupCountsSql.rollupCountsSqlFile", + markdown = + "Name of the entity id - rollup counts (= number of items entity instances) pairs SQL file.\n\n" + + "File must be in the same directory as the entity/group file. Name includes file extension.\n\n" + + "There can be other columns selected in the SQL file (e.g. `SELECT * FROM relationships`), but the " + + "entity id and rollup count fields are required.", + exampleValue = "rollupCounts.sql") + public String sqlFile; + + @AnnotatedField( + name = "SZRollupCountsSql.entityIdFieldName", + markdown = "Name of the field or column name that maps to the entity id.", + exampleValue = "entity_id") + public String entityIdFieldName; + + @AnnotatedField( + name = "SZRollupCountsSql.rollupCountFieldName", + markdown = "Name of the field or column name that maps to the rollup count per entity id.", + exampleValue = "rollup_count") + public String rollupCountFieldName; +} diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STRelationshipRollupCounts.java b/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STRelationshipRollupCounts.java new file mode 100644 index 000000000..2d3c2150a --- /dev/null +++ b/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STRelationshipRollupCounts.java @@ -0,0 +1,55 @@ +package bio.terra.tanagra.underlay.sourcetable; + +import bio.terra.tanagra.api.shared.DataType; +import bio.terra.tanagra.query.bigquery.BQTable; +import bio.terra.tanagra.query.sql.SqlField; +import bio.terra.tanagra.underlay.ColumnSchema; +import com.google.common.collect.ImmutableList; + +public class STRelationshipRollupCounts extends SourceTable { + private final String entityGroup; + private final String entity; + private final String countedEntity; + private final ColumnSchema entityIdColumnSchema; + private final ColumnSchema countColumnSchema; + + public STRelationshipRollupCounts( + BQTable bqTable, + String entityGroup, + String entity, + String countedEntity, + String entityIdFieldName, + String countFieldName) { + super(bqTable); + this.entityGroup = entityGroup; + this.entity = entity; + this.countedEntity = countedEntity; + this.entityIdColumnSchema = new ColumnSchema(entityIdFieldName, DataType.INT64); + this.countColumnSchema = new ColumnSchema(countFieldName, DataType.INT64); + } + + @Override + public ImmutableList getColumnSchemas() { + return ImmutableList.of(entityIdColumnSchema, countColumnSchema); + } + + public String getEntityGroup() { + return entityGroup; + } + + public String getEntity() { + return entity; + } + + public String getCountedEntity() { + return countedEntity; + } + + public SqlField getEntityIdField() { + return SqlField.of(entityIdColumnSchema.getColumnName()); + } + + public SqlField getCountField() { + return SqlField.of(countColumnSchema.getColumnName()); + } +} diff --git a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/entityGroup.json b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/entityGroup.json index de5cdf420..752cc988f 100644 --- a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/entityGroup.json +++ b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/entityGroup.json @@ -5,5 +5,10 @@ "idPairsSqlFile": "idPairs.sql", "useSourceIdPairsSql": true, "groupEntityIdFieldName": "variant_row_num", - "itemsEntityIdFieldName": "flattened_person_id" + "itemsEntityIdFieldName": "flattened_person_id", + "rollupCountsSql": { + "sqlFile": "rollupCounts.sql", + "entityIdFieldName": "variant_row_num", + "rollupCountFieldName": "num_persons" + } } diff --git a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/idPairs.sql b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/idPairs.sql index 418cfc020..665a86eca 100644 --- a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/idPairs.sql +++ b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/idPairs.sql @@ -1,5 +1,6 @@ SELECT v.variant_row_num, flattened_person_id -FROM `${omopDataset}.variant_to_person` AS vtop +/* Wrap variant_to_person table in a SELECT DISTINCT because there is a duplicate row in the test data. */ +FROM (SELECT DISTINCT vid, person_ids FROM `${omopDataset}.variant_to_person`) AS vtop JOIN (SELECT ROW_NUMBER() OVER (ORDER BY vid) AS variant_row_num, vid FROM `${omopDataset}.prep_vat`) AS v ON v.vid = vtop.vid diff --git a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/rollupCounts.sql b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/rollupCounts.sql new file mode 100644 index 000000000..64d2f0ae3 --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/rollupCounts.sql @@ -0,0 +1,6 @@ +SELECT v.variant_row_num, ARRAY_LENGTH(vtop.person_ids) AS num_persons +/* Wrap variant_to_person table in a SELECT DISTINCT because there is a duplicate row in the test data. */ +FROM (SELECT DISTINCT vid, person_ids FROM `${omopDataset}.variant_to_person`) AS vtop +JOIN + (SELECT ROW_NUMBER() OVER (ORDER BY vid) AS variant_row_num, vid FROM `${omopDataset}.prep_vat`) + AS v ON v.vid = vtop.vid