diff --git a/annotationProcessor/src/main/java/bio/terra/tanagra/annotation/UnderlayConfigPath.java b/annotationProcessor/src/main/java/bio/terra/tanagra/annotation/UnderlayConfigPath.java
index c7c3f209b..74bf30697 100644
--- a/annotationProcessor/src/main/java/bio/terra/tanagra/annotation/UnderlayConfigPath.java
+++ b/annotationProcessor/src/main/java/bio/terra/tanagra/annotation/UnderlayConfigPath.java
@@ -1,20 +1,6 @@
package bio.terra.tanagra.annotation;
-import bio.terra.tanagra.underlay.serialization.SZAttribute;
-import bio.terra.tanagra.underlay.serialization.SZBigQuery;
-import bio.terra.tanagra.underlay.serialization.SZCorePlugin;
-import bio.terra.tanagra.underlay.serialization.SZCriteriaOccurrence;
-import bio.terra.tanagra.underlay.serialization.SZCriteriaSelector;
-import bio.terra.tanagra.underlay.serialization.SZDataType;
-import bio.terra.tanagra.underlay.serialization.SZEntity;
-import bio.terra.tanagra.underlay.serialization.SZGroupItems;
-import bio.terra.tanagra.underlay.serialization.SZHierarchy;
-import bio.terra.tanagra.underlay.serialization.SZIndexer;
-import bio.terra.tanagra.underlay.serialization.SZPrepackagedCriteria;
-import bio.terra.tanagra.underlay.serialization.SZService;
-import bio.terra.tanagra.underlay.serialization.SZTextSearch;
-import bio.terra.tanagra.underlay.serialization.SZUnderlay;
-import bio.terra.tanagra.underlay.serialization.SZVisualization;
+import bio.terra.tanagra.underlay.serialization.*;
import java.util.List;
@SuppressWarnings("PMD.CouplingBetweenObjects")
@@ -52,6 +38,7 @@ public class UnderlayConfigPath extends AnnotationPath {
SZCriteriaSelector.Display.class,
SZCriteriaSelector.Modifier.class,
SZPrepackagedCriteria.class,
+ SZRollupCountsSql.class,
SZVisualization.class,
SZCorePlugin.class);
diff --git a/docs/generated/UNDERLAY_CONFIG.md b/docs/generated/UNDERLAY_CONFIG.md
index 9f20df40b..9a4058c05 100644
--- a/docs/generated/UNDERLAY_CONFIG.md
+++ b/docs/generated/UNDERLAY_CONFIG.md
@@ -23,6 +23,7 @@ This documentation is generated from annotations in the configuration classes.
* [SZPrepackagedCriteria](#szprepackagedcriteria)
* [SZPrimaryCriteriaRelationship](#szprimarycriteriarelationship)
* [SZPrimaryRelationship](#szprimaryrelationship)
+* [SZRollupCountsSql](#szrollupcountssql)
* [SZService](#szservice)
* [SZSourceData](#szsourcedata)
* [SZSourceQuery](#szsourcequery)
@@ -642,7 +643,7 @@ Required if the [id pairs SQL](#szgroupitemsidpairssqlfile) is defined.
Name of the group entity - items entity id pairs SQL file.
-If this property is set, then the [id pairs SQL](#szgroupitemsidpairssqlfile) must be unset. File must be in the same directory as the entity group file. Name includes file extension.
+File must be in the same directory as the entity group file. Name includes file extension.
There can be other columns selected in the SQL file (e.g. `SELECT * FROM relationships`), but the group and items entity ids are required. If this property is set, then the [foreign key atttribute](#szgroupitemsforeignkeyattributeitemsentity) must be unset.
@@ -669,6 +670,11 @@ Name of the entity group.
This is the unique identifier for the entity group. In a single underlay, the entity group names of any group type cannot overlap. Name may not include spaces or special characters, only letters and numbers. The first character must be a letter.
+### SZGroupItems.rollupCountsSql
+**optional** [SZRollupCountsSql](#szrollupcountssql)
+
+Pointer to SQL that returns entity id - rollup count (= number of related entity instances) pairs.
+
### SZGroupItems.useSourceIdPairsSql
**optional** boolean
@@ -967,6 +973,36 @@ Name of the field or column name that maps to the primary entity id. Required if
+## SZRollupCountsSql
+Pointer to SQL that returns entity id - rollup count (= number of related entity instances) pairs (e.g. variant - number of people). Useful when there's an easy way to calculate these in SQL and we want to avoid ingesting the full entity - related entity relationship id pairs table into Dataflow.
+
+### SZRollupCountsSql.entityIdFieldName
+**required** String
+
+Name of the field or column name that maps to the entity id.
+
+*Example value:* `entity_id`
+
+### SZRollupCountsSql.rollupCountFieldName
+**required** String
+
+Name of the field or column name that maps to the rollup count per entity id.
+
+*Example value:* `rollup_count`
+
+### SZRollupCountsSql.rollupCountsSqlFile
+**required** String
+
+Name of the entity id - rollup counts (= number of items entity instances) pairs SQL file.
+
+File must be in the same directory as the entity/group file. Name includes file extension.
+
+There can be other columns selected in the SQL file (e.g. `SELECT * FROM relationships`), but the entity id and rollup count fields are required.
+
+*Example value:* `rollupCounts.sql`
+
+
+
## SZService
Service configuration.
diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java b/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java
index 6a607e13e..ecd329b45 100644
--- a/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java
+++ b/indexer/src/main/java/bio/terra/tanagra/indexing/JobSequencer.java
@@ -31,11 +31,7 @@
import bio.terra.tanagra.underlay.indextable.ITHierarchyChildParent;
import bio.terra.tanagra.underlay.indextable.ITRelationshipIdPairs;
import bio.terra.tanagra.underlay.serialization.SZIndexer;
-import bio.terra.tanagra.underlay.sourcetable.STEntityAttributes;
-import bio.terra.tanagra.underlay.sourcetable.STHierarchyChildParent;
-import bio.terra.tanagra.underlay.sourcetable.STHierarchyRootFilter;
-import bio.terra.tanagra.underlay.sourcetable.STRelationshipIdPairs;
-import bio.terra.tanagra.underlay.sourcetable.STTextSearchTerms;
+import bio.terra.tanagra.underlay.sourcetable.*;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -192,6 +188,14 @@ public static SequencedJobSet getJobSetForGroupItems(
groupItems.getGroupEntity().getName(),
groupItems.getItemsEntity().getName())
: null;
+ STRelationshipRollupCounts groupItemsRelationshipRollupCountsTable =
+ underlay
+ .getSourceSchema()
+ .getRelationshipRollupCounts(
+ groupItems.getName(),
+ groupItems.getGroupEntity().getName(),
+ groupItems.getItemsEntity().getName())
+ .orElse(null);
jobSet.addJob(
new WriteRollupCounts(
indexerConfig,
@@ -203,7 +207,8 @@ public static SequencedJobSet getJobSetForGroupItems(
itemsEntityIndexTable,
groupItemsIdPairsTable,
null,
- null));
+ null,
+ groupItemsRelationshipRollupCountsTable));
// If the criteria entity has hierarchies, then also compute the criteria rollup counts for each
// hierarchy.
@@ -228,7 +233,8 @@ public static SequencedJobSet getJobSetForGroupItems(
underlay
.getIndexSchema()
.getHierarchyAncestorDescendant(
- groupItems.getGroupEntity().getName(), hierarchy.getName()))));
+ groupItems.getGroupEntity().getName(), hierarchy.getName()),
+ null)));
}
if (groupItems.getGroupEntity().hasHierarchies()) {
@@ -378,6 +384,7 @@ public static SequencedJobSet getJobSetForCriteriaOccurrence(
primaryEntityIndexTable,
primaryCriteriaIdPairsTable,
null,
+ null,
null));
// If the criteria entity has hierarchies, then also compute the criteria rollup counts for each
@@ -404,7 +411,8 @@ public static SequencedJobSet getJobSetForCriteriaOccurrence(
.getIndexSchema()
.getHierarchyAncestorDescendant(
criteriaOccurrence.getCriteriaEntity().getName(),
- hierarchy.getName()))));
+ hierarchy.getName()),
+ null)));
}
// Compute instance-level display hints for the occurrence entity attributes that are flagged as
diff --git a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java
index 136851088..1bf0ed4c9 100644
--- a/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java
+++ b/indexer/src/main/java/bio/terra/tanagra/indexing/job/dataflow/WriteRollupCounts.java
@@ -17,6 +17,8 @@
import bio.terra.tanagra.underlay.indextable.ITHierarchyAncestorDescendant;
import bio.terra.tanagra.underlay.indextable.ITRelationshipIdPairs;
import bio.terra.tanagra.underlay.serialization.SZIndexer;
+import bio.terra.tanagra.underlay.sourcetable.*;
+import com.google.api.*;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
@@ -78,6 +80,7 @@ public class WriteRollupCounts extends BigQueryJob {
private final @Nullable ITRelationshipIdPairs relationshipIdPairsIndexTable;
private final @Nullable Hierarchy hierarchy;
private final @Nullable ITHierarchyAncestorDescendant ancestorDescendantTable;
+ private final @Nullable STRelationshipRollupCounts relationshipRollupCountsSourceTable;
@SuppressWarnings({"checkstyle:ParameterNumber", "PMD.ExcessiveParameterList"})
public WriteRollupCounts(
@@ -90,7 +93,8 @@ public WriteRollupCounts(
ITEntityMain countedEntityIndexTable,
@Nullable ITRelationshipIdPairs relationshipIdPairsIndexTable,
@Nullable Hierarchy hierarchy,
- @Nullable ITHierarchyAncestorDescendant ancestorDescendantTable) {
+ @Nullable ITHierarchyAncestorDescendant ancestorDescendantTable,
+ @Nullable STRelationshipRollupCounts relationshipRollupCountsSourceTable) {
super(indexerConfig);
this.entityGroup = entityGroup;
this.entity = entity;
@@ -101,6 +105,7 @@ public WriteRollupCounts(
this.relationshipIdPairsIndexTable = relationshipIdPairsIndexTable;
this.hierarchy = hierarchy;
this.ancestorDescendantTable = ancestorDescendantTable;
+ this.relationshipRollupCountsSourceTable = relationshipRollupCountsSourceTable;
}
@Override
@@ -136,16 +141,20 @@ && outputTableHasAtLeastOneRowWithNotNullField(
@Override
public void run(boolean isDryRun) {
- // Only run the Dataflow job if the temp table hasn't been written yet.
- Optional
tempTable =
- googleBigQuery.getTable(
- indexerConfig.bigQuery.indexData.projectId,
- indexerConfig.bigQuery.indexData.datasetId,
- getTempTableName());
- if (tempTable.isEmpty()) {
- writeFieldsToTempTable(isDryRun);
+ if (relationshipRollupCountsSourceTable == null) {
+ // Only run the Dataflow job if the temp table hasn't been written yet.
+ Optional tempTable =
+ googleBigQuery.getTable(
+ indexerConfig.bigQuery.indexData.projectId,
+ indexerConfig.bigQuery.indexData.datasetId,
+ getTempTableName());
+ if (tempTable.isEmpty()) {
+ writeFieldsToTempTable(isDryRun);
+ } else {
+ LOGGER.info("Temp table has already been written. Skipping Dataflow job.");
+ }
} else {
- LOGGER.info("Temp table has already been written. Skipping Dataflow job.");
+ LOGGER.info("Rollup counts source SQL is defined. Skipping Dataflow job.");
}
// Dataflow jobs can only write new rows to BigQuery, so in this second step, copy over the
@@ -353,21 +362,30 @@ private void copyFieldsToEntityTable(boolean isDryRun) {
indexTable.getEntityGroupCountField(
entityGroup.getName(), hierarchy == null ? null : hierarchy.getName());
- BQTable tempBQTable =
- new BQTable(
- indexerConfig.bigQuery.indexData.projectId,
- indexerConfig.bigQuery.indexData.datasetId,
- getTempTableName());
- SqlField tempTableIdField = SqlField.of(entityTableIdField.getColumnName());
- SqlField tempTableCountField = SqlField.of(entityTableCountField.getColumnName());
- String tempTableSql =
+ BQTable rollupCountsBQTable;
+ SqlField rollupCountsTableIdField;
+ SqlField rollupCountsTableCountField;
+ if (relationshipRollupCountsSourceTable == null) {
+ rollupCountsBQTable =
+ new BQTable(
+ indexerConfig.bigQuery.indexData.projectId,
+ indexerConfig.bigQuery.indexData.datasetId,
+ getTempTableName());
+ rollupCountsTableIdField = SqlField.of(entityTableIdField.getColumnName());
+ rollupCountsTableCountField = SqlField.of(entityTableCountField.getColumnName());
+ } else {
+ rollupCountsBQTable = relationshipRollupCountsSourceTable.getTablePointer();
+ rollupCountsTableIdField = relationshipRollupCountsSourceTable.getEntityIdField();
+ rollupCountsTableCountField = relationshipRollupCountsSourceTable.getCountField();
+ }
+ String rollupCountsTableSql =
"SELECT "
- + SqlQueryField.of(tempTableIdField).renderForSelect()
+ + SqlQueryField.of(rollupCountsTableIdField).renderForSelect()
+ ", "
- + SqlQueryField.of(tempTableCountField).renderForSelect()
+ + SqlQueryField.of(rollupCountsTableCountField).renderForSelect()
+ " FROM "
- + tempBQTable.render();
- LOGGER.info("temp table query: {}", tempTableSql);
+ + rollupCountsBQTable.render();
+ LOGGER.info("rollup counts table query: {}", rollupCountsTableSql);
// Build an update-from-select query for the index entity main table and the
// id-count query.
@@ -381,19 +399,19 @@ private void copyFieldsToEntityTable(boolean isDryRun) {
+ " SET "
+ SqlQueryField.of(entityTableCountField).renderForSelect(updateTableAlias)
+ " = "
- + SqlQueryField.of(tempTableCountField).renderForSelect(tempTableAlias)
+ + SqlQueryField.of(rollupCountsTableCountField).renderForSelect(tempTableAlias)
+ " FROM (SELECT "
- + SqlQueryField.of(tempTableCountField).renderForSelect()
+ + SqlQueryField.of(rollupCountsTableCountField).renderForSelect()
+ ", "
- + SqlQueryField.of(tempTableIdField).renderForSelect()
+ + SqlQueryField.of(rollupCountsTableIdField).renderForSelect()
+ " FROM "
- + tempBQTable.render()
+ + rollupCountsBQTable.render()
+ ") AS "
+ tempTableAlias
+ " WHERE "
+ SqlQueryField.of(entityTableIdField).renderForSelect(updateTableAlias)
+ " = "
- + SqlQueryField.of(tempTableIdField).renderForSelect(tempTableAlias);
+ + SqlQueryField.of(rollupCountsTableIdField).renderForSelect(tempTableAlias);
LOGGER.info("update-from-select query: {}", updateFromSelectSql);
// Run the update-from-select to write the count field in the index entity main table.
diff --git a/ui/src/tanagra-underlay/underlayConfig.ts b/ui/src/tanagra-underlay/underlayConfig.ts
index 6e5f58f62..5a7a1ccc2 100644
--- a/ui/src/tanagra-underlay/underlayConfig.ts
+++ b/ui/src/tanagra-underlay/underlayConfig.ts
@@ -114,6 +114,7 @@ export type SZGroupItems = {
itemsEntity: string;
itemsEntityIdFieldName?: string;
name: string;
+ rollupCountsSql?: SZRollupCountsSql;
useSourceIdPairsSql?: boolean;
};
@@ -176,6 +177,12 @@ export type SZPrimaryRelationship = {
primaryEntityIdFieldName?: string;
};
+export type SZRollupCountsSql = {
+ entityIdFieldName: string;
+ rollupCountFieldName: string;
+ sqlFile: string;
+};
+
export type SZService = {
bigQuery: SZBigQuery;
underlay: string;
diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/SourceSchema.java b/underlay/src/main/java/bio/terra/tanagra/underlay/SourceSchema.java
index 731dfba04..0d698e637 100644
--- a/underlay/src/main/java/bio/terra/tanagra/underlay/SourceSchema.java
+++ b/underlay/src/main/java/bio/terra/tanagra/underlay/SourceSchema.java
@@ -6,15 +6,10 @@
import bio.terra.tanagra.underlay.serialization.SZEntity;
import bio.terra.tanagra.underlay.serialization.SZGroupItems;
import bio.terra.tanagra.underlay.serialization.SZUnderlay;
-import bio.terra.tanagra.underlay.sourcetable.STEntityAttributes;
-import bio.terra.tanagra.underlay.sourcetable.STHierarchyChildParent;
-import bio.terra.tanagra.underlay.sourcetable.STHierarchyRootFilter;
-import bio.terra.tanagra.underlay.sourcetable.STRelationshipIdPairs;
-import bio.terra.tanagra.underlay.sourcetable.STTextSearchTerms;
+import bio.terra.tanagra.underlay.sourcetable.*;
import com.google.common.collect.ImmutableList;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.*;
@SuppressFBWarnings(
value = "NP_UNWRITTEN_PUBLIC_OR_PROTECTED_FIELD",
@@ -26,18 +21,21 @@ public final class SourceSchema {
private final ImmutableList hierarchyChildParentTables;
private final ImmutableList hierarchyRootFilterTables;
private final ImmutableList relationshipIdPairTables;
+ private final ImmutableList relationshipRollupCountTables;
private SourceSchema(
List entityAttributesTables,
List textSearchTermsTables,
List hierarchyChildParentTables,
List hierarchyRootFilterTables,
- List relationshipIdPairTables) {
+ List relationshipIdPairTables,
+ List relationshipRollupCountTables) {
this.entityAttributesTables = ImmutableList.copyOf(entityAttributesTables);
this.textSearchTermsTables = ImmutableList.copyOf(textSearchTermsTables);
this.hierarchyChildParentTables = ImmutableList.copyOf(hierarchyChildParentTables);
this.hierarchyRootFilterTables = ImmutableList.copyOf(hierarchyRootFilterTables);
this.relationshipIdPairTables = ImmutableList.copyOf(relationshipIdPairTables);
+ this.relationshipRollupCountTables = ImmutableList.copyOf(relationshipRollupCountTables);
}
public STEntityAttributes getEntityAttributes(String entity) {
@@ -99,6 +97,17 @@ public STRelationshipIdPairs getRelationshipIdPairs(
.orElseThrow();
}
+ public Optional getRelationshipRollupCounts(
+ String entityGroup, String entity, String countedEntity) {
+ return relationshipRollupCountTables.stream()
+ .filter(
+ relationshipRollupCounts ->
+ relationshipRollupCounts.getEntityGroup().equals(entityGroup)
+ && relationshipRollupCounts.getEntity().equals(entity)
+ && relationshipRollupCounts.getCountedEntity().equals(countedEntity))
+ .findFirst();
+ }
+
public static SourceSchema fromConfig(
SZBigQuery szBigQuery, SZUnderlay szUnderlay, ConfigReader configReader) {
List entityAttributesTables = new ArrayList<>();
@@ -106,6 +115,7 @@ public static SourceSchema fromConfig(
List hierarchyChildParentTables = new ArrayList<>();
List hierarchyRootFilterTables = new ArrayList<>();
List relationshipIdPairTables = new ArrayList<>();
+ List relationshipRollupCountTables = new ArrayList<>();
// Build source tables for each entity.
szUnderlay.entities.forEach(
@@ -121,7 +131,11 @@ public static SourceSchema fromConfig(
// Build source tables for each entity group.
szUnderlay.groupItemsEntityGroups.forEach(
groupItemsPath ->
- fromConfigGroupItems(groupItemsPath, configReader, relationshipIdPairTables));
+ fromConfigGroupItems(
+ groupItemsPath,
+ configReader,
+ relationshipIdPairTables,
+ relationshipRollupCountTables));
szUnderlay.criteriaOccurrenceEntityGroups.forEach(
criteriaOccurrencePath ->
fromConfigCriteriaOccurrence(
@@ -134,7 +148,8 @@ public static SourceSchema fromConfig(
textSearchTermsTables,
hierarchyChildParentTables,
hierarchyRootFilterTables,
- relationshipIdPairTables);
+ relationshipIdPairTables,
+ relationshipRollupCountTables);
}
private static void fromConfigEntity(
@@ -184,7 +199,8 @@ private static void fromConfigEntity(
private static void fromConfigGroupItems(
String groupItemsPath,
ConfigReader configReader,
- List relationshipIdPairTables) {
+ List relationshipIdPairTables,
+ List relationshipRollupCountTables) {
SZGroupItems szGroupItems = configReader.readGroupItems(groupItemsPath);
if (szGroupItems.idPairsSqlFile != null) {
// RelationshipIdPairs table.
@@ -200,6 +216,20 @@ private static void fromConfigGroupItems(
szGroupItems.groupEntityIdFieldName,
szGroupItems.itemsEntityIdFieldName));
}
+ if (szGroupItems.rollupCountsSql != null) {
+ // RelationshipRollupCounts table.
+ String rollupCountsSql =
+ configReader.readEntityGroupSql(groupItemsPath, szGroupItems.rollupCountsSql.sqlFile);
+ BQTable rollupCountsTable = new BQTable(rollupCountsSql);
+ relationshipRollupCountTables.add(
+ new STRelationshipRollupCounts(
+ rollupCountsTable,
+ szGroupItems.name,
+ szGroupItems.groupEntity,
+ szGroupItems.itemsEntity,
+ szGroupItems.rollupCountsSql.entityIdFieldName,
+ szGroupItems.rollupCountsSql.rollupCountFieldName));
+ }
}
private static void fromConfigCriteriaOccurrence(
diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZGroupItems.java b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZGroupItems.java
index ae996fcda..a1c01c647 100644
--- a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZGroupItems.java
+++ b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZGroupItems.java
@@ -38,7 +38,6 @@ public class SZGroupItems {
name = "SZGroupItems.idPairsSqlFile",
markdown =
"Name of the group entity - items entity id pairs SQL file.\n\n"
- + "If this property is set, then the [id pairs SQL](${SZGroupItems.idPairsSqlFile}) must be unset. "
+ "File must be in the same directory as the entity group file. Name includes file extension.\n\n"
+ "There can be other columns selected in the SQL file (e.g. `SELECT * FROM relationships`), but the "
+ "group and items entity ids are required. If this property is set, then the "
@@ -73,4 +72,11 @@ public class SZGroupItems {
optional = true,
exampleValue = "items_id")
public String itemsEntityIdFieldName;
+
+ @AnnotatedField(
+ name = "SZGroupItems.rollupCountsSql",
+ markdown =
+ "Pointer to SQL that returns entity id - rollup count (= number of related entity instances) pairs.",
+ optional = true)
+ public SZRollupCountsSql rollupCountsSql;
}
diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZRollupCountsSql.java b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZRollupCountsSql.java
new file mode 100644
index 000000000..7bccd23ed
--- /dev/null
+++ b/underlay/src/main/java/bio/terra/tanagra/underlay/serialization/SZRollupCountsSql.java
@@ -0,0 +1,33 @@
+package bio.terra.tanagra.underlay.serialization;
+
+import bio.terra.tanagra.annotation.*;
+
+@AnnotatedClass(
+ name = "SZRollupCountsSql",
+ markdown =
+ "Pointer to SQL that returns entity id - rollup count (= number of related entity instances) pairs "
+ + "(e.g. variant - number of people). Useful when there's an easy way to calculate these in SQL "
+ + "and we want to avoid ingesting the full entity - related entity relationship id pairs table into Dataflow.")
+public class SZRollupCountsSql {
+ @AnnotatedField(
+ name = "SZRollupCountsSql.rollupCountsSqlFile",
+ markdown =
+ "Name of the entity id - rollup counts (= number of items entity instances) pairs SQL file.\n\n"
+ + "File must be in the same directory as the entity/group file. Name includes file extension.\n\n"
+ + "There can be other columns selected in the SQL file (e.g. `SELECT * FROM relationships`), but the "
+ + "entity id and rollup count fields are required.",
+ exampleValue = "rollupCounts.sql")
+ public String sqlFile;
+
+ @AnnotatedField(
+ name = "SZRollupCountsSql.entityIdFieldName",
+ markdown = "Name of the field or column name that maps to the entity id.",
+ exampleValue = "entity_id")
+ public String entityIdFieldName;
+
+ @AnnotatedField(
+ name = "SZRollupCountsSql.rollupCountFieldName",
+ markdown = "Name of the field or column name that maps to the rollup count per entity id.",
+ exampleValue = "rollup_count")
+ public String rollupCountFieldName;
+}
diff --git a/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STRelationshipRollupCounts.java b/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STRelationshipRollupCounts.java
new file mode 100644
index 000000000..2d3c2150a
--- /dev/null
+++ b/underlay/src/main/java/bio/terra/tanagra/underlay/sourcetable/STRelationshipRollupCounts.java
@@ -0,0 +1,55 @@
+package bio.terra.tanagra.underlay.sourcetable;
+
+import bio.terra.tanagra.api.shared.DataType;
+import bio.terra.tanagra.query.bigquery.BQTable;
+import bio.terra.tanagra.query.sql.SqlField;
+import bio.terra.tanagra.underlay.ColumnSchema;
+import com.google.common.collect.ImmutableList;
+
+public class STRelationshipRollupCounts extends SourceTable {
+ private final String entityGroup;
+ private final String entity;
+ private final String countedEntity;
+ private final ColumnSchema entityIdColumnSchema;
+ private final ColumnSchema countColumnSchema;
+
+ public STRelationshipRollupCounts(
+ BQTable bqTable,
+ String entityGroup,
+ String entity,
+ String countedEntity,
+ String entityIdFieldName,
+ String countFieldName) {
+ super(bqTable);
+ this.entityGroup = entityGroup;
+ this.entity = entity;
+ this.countedEntity = countedEntity;
+ this.entityIdColumnSchema = new ColumnSchema(entityIdFieldName, DataType.INT64);
+ this.countColumnSchema = new ColumnSchema(countFieldName, DataType.INT64);
+ }
+
+ @Override
+ public ImmutableList getColumnSchemas() {
+ return ImmutableList.of(entityIdColumnSchema, countColumnSchema);
+ }
+
+ public String getEntityGroup() {
+ return entityGroup;
+ }
+
+ public String getEntity() {
+ return entity;
+ }
+
+ public String getCountedEntity() {
+ return countedEntity;
+ }
+
+ public SqlField getEntityIdField() {
+ return SqlField.of(entityIdColumnSchema.getColumnName());
+ }
+
+ public SqlField getCountField() {
+ return SqlField.of(countColumnSchema.getColumnName());
+ }
+}
diff --git a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/entityGroup.json b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/entityGroup.json
index de5cdf420..752cc988f 100644
--- a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/entityGroup.json
+++ b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/entityGroup.json
@@ -5,5 +5,10 @@
"idPairsSqlFile": "idPairs.sql",
"useSourceIdPairsSql": true,
"groupEntityIdFieldName": "variant_row_num",
- "itemsEntityIdFieldName": "flattened_person_id"
+ "itemsEntityIdFieldName": "flattened_person_id",
+ "rollupCountsSql": {
+ "sqlFile": "rollupCounts.sql",
+ "entityIdFieldName": "variant_row_num",
+ "rollupCountFieldName": "num_persons"
+ }
}
diff --git a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/idPairs.sql b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/idPairs.sql
index 418cfc020..665a86eca 100644
--- a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/idPairs.sql
+++ b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/idPairs.sql
@@ -1,5 +1,6 @@
SELECT v.variant_row_num, flattened_person_id
-FROM `${omopDataset}.variant_to_person` AS vtop
+/* Wrap variant_to_person table in a SELECT DISTINCT because there is a duplicate row in the test data. */
+FROM (SELECT DISTINCT vid, person_ids FROM `${omopDataset}.variant_to_person`) AS vtop
JOIN
(SELECT ROW_NUMBER() OVER (ORDER BY vid) AS variant_row_num, vid FROM `${omopDataset}.prep_vat`)
AS v ON v.vid = vtop.vid
diff --git a/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/rollupCounts.sql b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/rollupCounts.sql
new file mode 100644
index 000000000..64d2f0ae3
--- /dev/null
+++ b/underlay/src/main/resources/config/datamapping/aouCT_testonly/entitygroup/variantPerson/rollupCounts.sql
@@ -0,0 +1,6 @@
+SELECT v.variant_row_num, ARRAY_LENGTH(vtop.person_ids) AS num_persons
+/* Wrap variant_to_person table in a SELECT DISTINCT because there is a duplicate row in the test data. */
+FROM (SELECT DISTINCT vid, person_ids FROM `${omopDataset}.variant_to_person`) AS vtop
+JOIN
+ (SELECT ROW_NUMBER() OVER (ORDER BY vid) AS variant_row_num, vid FROM `${omopDataset}.prep_vat`)
+ AS v ON v.vid = vtop.vid