Skip to content

Commit

Permalink
List instead of IndexedSeq
Browse files Browse the repository at this point in the history
  • Loading branch information
davidangb committed Dec 17, 2024
1 parent 119fc09 commit 5a22ef9
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ object TsvFormatterBenchmark {

val model: ModelSchema = FlexibleModelSchema

val headers: IndexedSeq[String] = IndexedSeq("sample_id", "col1", "col2", "fourth", "last")
val headers: List[String] = List("sample_id", "col1", "col2", "fourth", "last")

val entities: Seq[Entity] = Seq(
Entity(
Expand Down Expand Up @@ -60,7 +60,7 @@ object TsvFormatterBenchmark {
class TsvFormatterBenchmark {

@Benchmark
def makeEntityRows(blackHole: Blackhole, entityData: EntityData): IndexedSeq[IndexedSeq[String]] = {
def makeEntityRows(blackHole: Blackhole, entityData: EntityData): List[List[String]] = {
val result =
TSVFormatter.makeEntityRows(entityData.entityType, entityData.entities, entityData.headers)(entityData.model)
blackHole.consume(result)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ case class ExportEntitiesByTypeArguments(
workspaceNamespace: String,
workspaceName: String,
entityType: String,
attributeNames: Option[IndexedSeq[String]],
attributeNames: Option[List[String]],
model: Option[String]
)

Expand Down Expand Up @@ -80,7 +80,7 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
workspaceNamespace: String,
workspaceName: String,
entityType: String,
attributeNames: Option[IndexedSeq[String]],
attributeNames: Option[List[String]],
model: Option[String],
argSystem: ActorSystem
)(implicit protected val executionContext: ExecutionContext)
Expand Down Expand Up @@ -213,7 +213,7 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,

private def streamSingularType(entityQueries: Seq[EntityQuery],
metadata: EntityTypeMetadata,
entityHeaders: IndexedSeq[String]
entityHeaders: List[String]
): Future[File] = {
val tempEntityFile: File = File.newTemporaryFile(prefix = entityType)
val entitySink: Sink[ByteString, Future[IOResult]] = FileIO.toPath(tempEntityFile.path)
Expand Down Expand Up @@ -275,9 +275,9 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
val membershipSink: Sink[ByteString, Future[IOResult]] = FileIO.toPath(tempMembershipFile.path)

// Headers
val entityHeaders: IndexedSeq[String] =
val entityHeaders: List[String] =
TSVFormatter.makeEntityHeaders(entityType, metadata.attributeNames, attributeNames)
val membershipHeaders: IndexedSeq[String] = TSVFormatter.makeMembershipHeaders(entityType)
val membershipHeaders: List[String] = TSVFormatter.makeMembershipHeaders(entityType)

// Run the Split Entity Flow that pipes entities through the two flows to the two file sinks
// Result of this will be a tuple of Future[IOResult] that represents the success or failure of
Expand Down Expand Up @@ -432,7 +432,7 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
logger.info(s"completed pairing; result is ${pairs.length} rows")

// TSV headers
val entityHeaders: IndexedSeq[String] = IndexedSeq(s"entity:${entityType}_id", read1Name, read2Name)
val entityHeaders: List[String] = List(s"entity:${entityType}_id", read1Name, read2Name)

// transform the matched pairs into entities
val entities: List[Entity] = pairs.map {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package org.broadinstitute.dsde.firecloud.utils

import com.google.common.annotations.VisibleForTesting
import org.broadinstitute.dsde.rawls.model._
import org.broadinstitute.dsde.firecloud.model._
import org.broadinstitute.dsde.firecloud.service.TsvTypes

import scala.collection.LinearSeq

object TSVFormatter {

// for serializing entity references
Expand All @@ -12,11 +15,12 @@ object TSVFormatter {
/**
* Generate file content from headers and rows.
*
* @param headers IndexedSeq of header string values
* @param rows IndexedSeq of rows, each row an IndexedSeq of string values
* @param headers List of header string values
* @param rows List of rows, each row a List of string values
* @return Headers and rows combined.
*/
def exportToString(headers: IndexedSeq[String], rows: IndexedSeq[IndexedSeq[String]]): String = {
@VisibleForTesting
def exportToString(headers: List[String], rows: List[List[String]]): String = {
val headerString: String = headers.mkString("\t") + "\n"
val rowsString: String = rows.map(_.mkString("\t")).mkString("\n")
headerString + rowsString + "\n"
Expand All @@ -40,12 +44,12 @@ object TSVFormatter {
* Generate a row of values in the same order as the headers.
*
* @param entity The Entity object to extract data from
* @param headerAttributes List of ordered header values to determine order of values
* @return IndexedSeq of ordered data fields
* @param headerAttributes ordered header values to determine order of values
* @return ordered data fields
*/
private def makeRow(entity: Entity, headerAttributes: IndexedSeq[AttributeName]): IndexedSeq[String] =
private def makeRow(entity: Entity, headerAttributes: List[AttributeName]): List[String] =
// first column of the TSV is always the entity name
IndexedSeq(tsvSafeString(entity.name)) ++
List(tsvSafeString(entity.name)) ++
// remainder of columns are attributes of the entity, or "" if not found on this entity
headerAttributes.tail.map { colname =>
entity.attributes.get(colname) match {
Expand Down Expand Up @@ -89,11 +93,11 @@ object TSVFormatter {
* Generate a header for a membership file.
*
* @param entityType The EntityType
* @return IndexedSeq of header Strings
* @return ordered header Strings
*/
def makeMembershipHeaders(entityType: String)(implicit modelSchema: ModelSchema): IndexedSeq[String] =
IndexedSeq[String](s"${TsvTypes.MEMBERSHIP}:${entityType}_id",
modelSchema.getCollectionMemberType(entityType).get.getOrElse(entityType.replace("_set", ""))
def makeMembershipHeaders(entityType: String)(implicit modelSchema: ModelSchema): List[String] =
List[String](s"${TsvTypes.MEMBERSHIP}:${entityType}_id",
modelSchema.getCollectionMemberType(entityType).get.getOrElse(entityType.replace("_set", ""))
)

/**
Expand All @@ -105,9 +109,9 @@ object TSVFormatter {
*/
def makeMembershipRows(entityType: String, entities: Seq[Entity])(implicit
modelSchema: ModelSchema
): Seq[IndexedSeq[String]] = {
): List[List[String]] = {
val memberPlural = pluralizeMemberType(memberTypeFromEntityType(entityType, modelSchema), modelSchema)
entities
entities.toList
.filter {
_.entityType == entityType
}
Expand All @@ -120,10 +124,10 @@ object TSVFormatter {
}
.flatMap {
case (_, AttributeEntityReference(`entityType`, entityName)) =>
Seq(IndexedSeq[String](entity.name, entityName))
List(List[String](entity.name, entityName))
case (_, AttributeEntityReferenceList(refs)) =>
refs.map(ref => IndexedSeq[String](entity.name, ref.entityName))
case _ => Seq.empty
refs.toList.map(ref => List[String](entity.name, ref.entityName))
case _ => List.empty
}
}
}
Expand All @@ -136,9 +140,9 @@ object TSVFormatter {
* @param requestedHeaders Which, if any, columns were requested. If none, return allHeaders (subject to sanitization)
* @return Entity name as first column header, followed by matching entity attribute labels
*/
def makeEntityHeaders(entityType: String, allHeaders: Seq[String], requestedHeaders: Option[IndexedSeq[String]])(
implicit modelSchema: ModelSchema
): IndexedSeq[String] = {
def makeEntityHeaders(entityType: String, allHeaders: Seq[String], requestedHeaders: Option[List[String]])(implicit
modelSchema: ModelSchema
): List[String] = {
// will throw exception if firecloud model was requested and the entity type
val memberPlural = pluralizeMemberType(memberTypeFromEntityType(entityType, modelSchema), modelSchema)

Expand Down Expand Up @@ -169,7 +173,7 @@ object TSVFormatter {
s"${TsvTypes.UPDATE}:${entityType}_id"
case _ => s"${TsvTypes.ENTITY}:${entityType}_id"
}
(entityHeader +: requestedHeadersSansId.getOrElse(filteredAllHeaders)).toIndexedSeq
(entityHeader +: requestedHeadersSansId.getOrElse(filteredAllHeaders)).toList
}

/**
Expand All @@ -180,9 +184,9 @@ object TSVFormatter {
* @param headers The universe of available column headers
* @return Ordered list of rows, each row entry value ordered by its corresponding header position
*/
def makeEntityRows(entityType: String, entities: Seq[Entity], headers: IndexedSeq[String])(implicit
def makeEntityRows(entityType: String, entities: Seq[Entity], headers: List[String])(implicit
modelSchema: ModelSchema
): IndexedSeq[IndexedSeq[String]] = {
): List[List[String]] = {
// if we have a set entity, we need to filter out the attribute array of the members so that we only
// have top-level attributes to construct columns from.
val filteredEntities = if (modelSchema.isCollectionType(entityType)) {
Expand All @@ -193,13 +197,12 @@ object TSVFormatter {
}

// headers as AttributeNames
val headerAttributes: IndexedSeq[AttributeName] = headers.map(AttributeName.fromDelimitedName)
val headerAttributes: List[AttributeName] = headers.map(AttributeName.fromDelimitedName)

// Turn them into rows
filteredEntities
filteredEntities.toList
.filter(_.entityType == entityType)
.map(entity => makeRow(entity, headerAttributes))
.toIndexedSeq
}

def memberTypeFromEntityType(entityType: String, modelSchema: ModelSchema): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ trait CookieAuthedApiService extends Directives with RequestBuilding with LazyLo
post {
formFields(Symbol("FCtoken"), Symbol("attributeNames").?, Symbol("model").?) {
(tokenValue, attributeNamesString, modelString) =>
val attributeNames = attributeNamesString.map(_.split(",").toIndexedSeq)
val attributeNames = attributeNamesString.map(_.split(",").toList)
val userInfo = dummyUserInfo(tokenValue)
val exportArgs = ExportEntitiesByTypeArguments(userInfo,
workspaceNamespace,
Expand All @@ -50,7 +50,7 @@ trait CookieAuthedApiService extends Directives with RequestBuilding with LazyLo
get {
cookie("FCtoken") { tokenCookie =>
parameters(Symbol("attributeNames").?, Symbol("model").?) { (attributeNamesString, modelString) =>
val attributeNames = attributeNamesString.map(_.split(",").toIndexedSeq)
val attributeNames = attributeNamesString.map(_.split(",").toList)
val userInfo = dummyUserInfo(tokenCookie.value)
val exportArgs = ExportEntitiesByTypeArguments(userInfo,
workspaceNamespace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ trait ExportEntitiesApiService
requireUserInfo() { userInfo =>
get {
parameters(Symbol("attributeNames").?, Symbol("model").?) { (attributeNamesString, modelString) =>
val attributeNames = attributeNamesString.map(_.split(",").toIndexedSeq)
val attributeNames = attributeNamesString.map(_.split(",").toList)
val exportArgs = ExportEntitiesByTypeArguments(userInfo,
workspaceNamespace,
workspaceName,
Expand All @@ -50,7 +50,7 @@ trait ExportEntitiesApiService
} ~
post {
formFields(Symbol("attributeNames").?, Symbol("model").?) { (attributeNamesString, modelString) =>
val attributeNames = attributeNamesString.map(_.split(",").toIndexedSeq)
val attributeNames = attributeNamesString.map(_.split(",").toList)
val model = if (modelString.nonEmpty && StringUtils.isBlank(modelString.get)) None else modelString
val exportArgs = ExportEntitiesByTypeArguments(userInfo,
workspaceNamespace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with
)
results.head should be("entity:sample_id")

val results2 = testEntityDataSet("sample", sampleList, Option(IndexedSeq.empty))
val results2 = testEntityDataSet("sample", sampleList, Option(List.empty))
results2 should contain theSameElementsAs Seq("entity:sample_id",
"sample_type",
"header_1",
Expand All @@ -92,7 +92,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with
)
results2.head should be("entity:sample_id")

val results3 = testEntityDataSet("sample", sampleList, Option(IndexedSeq("")))
val results3 = testEntityDataSet("sample", sampleList, Option(List("")))
results3 should contain theSameElementsAs Seq("entity:sample_id",
"sample_type",
"header_1",
Expand All @@ -102,10 +102,10 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with
results3.head should be("entity:sample_id")

Seq(
IndexedSeq("header_2", "does_not_exist", "header_1"),
IndexedSeq("header_2", "sample_id", "header_1"),
IndexedSeq("header_1", "header_2"),
IndexedSeq("header_1")
List("header_2", "does_not_exist", "header_1"),
List("header_2", "sample_id", "header_1"),
List("header_1", "header_2"),
List("header_1")
).foreach { requestedHeaders =>
val resultsWithSpecificHeaders =
testEntityDataSet("sample", sampleList, Option(requestedHeaders), TsvTypes.UPDATE)
Expand All @@ -115,7 +115,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with

testEntityDataSet("sample",
sampleList,
Option(IndexedSeq("participant"))
Option(List("participant"))
) should contain theSameElementsInOrderAs Seq("entity:sample_id", "participant")

}
Expand Down Expand Up @@ -287,7 +287,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with

private def testEntityDataSet(entityType: String,
entities: List[Entity],
requestedHeaders: Option[IndexedSeq[String]],
requestedHeaders: Option[List[String]],
tsvType: TsvType = TsvTypes.ENTITY
) = {

Expand Down Expand Up @@ -323,7 +323,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with
): Unit = {
val tsvHeaders = TSVFormatter.makeMembershipHeaders(entityType)
val tsvRows = TSVFormatter.makeMembershipRows(entityType, entities)
val tsv = TSVFormatter.exportToString(tsvHeaders, tsvRows.toIndexedSeq)
val tsv = TSVFormatter.exportToString(tsvHeaders, tsvRows)
tsv shouldNot be(empty)

val lines: List[String] = Source.fromString(tsv).getLines().toList
Expand Down

0 comments on commit 5a22ef9

Please sign in to comment.