diff --git a/docs/src/main/scala/entry.scala.md b/docs/src/main/scala/entry.scala.md index fc3340f..1e2c532 100644 --- a/docs/src/main/scala/entry.scala.md +++ b/docs/src/main/scala/entry.scala.md @@ -198,9 +198,9 @@ http://web.expasy.org/docs/userman.html#DR_line ```scala case class DatabaseCrossReference( val resource : ResourceAbbreviation, - val identifier : String, - val otherInformation : Option[String], - val isoformID : Option[String] + val identifier : String + // val otherInformation : Option[String], + // val isoformID : Option[String] ) ``` @@ -231,6 +231,7 @@ sealed trait ResourceAbbreviation { case object CollecTF extends ResourceAbbreviation { val description: String = "CollecTF database of bacterial transcription factor binding sites" } case object ConoServer extends ResourceAbbreviation { val description: String = "ConoServer: ConeCone snail toxin database" } case object CTD extends ResourceAbbreviation { val description: String = "Comparative Toxicogenomics Database" } + case object DEPOD extends ResourceAbbreviation { val description: String = "DEPOD human dephosphorylation database" } case object dictyBase extends ResourceAbbreviation { val description: String = "Dictyostelium discoideum online informatics resource" } case object DIP extends ResourceAbbreviation { val description: String = "Database of interacting proteins" } case object DMDM extends ResourceAbbreviation { val description: String = "Domain mapping of disease mutations" } @@ -289,6 +290,7 @@ sealed trait ResourceAbbreviation { case object MIM extends ResourceAbbreviation { val description: String = "Mendelian Inheritance in Man Database (MIM)" } case object MINT extends ResourceAbbreviation { val description: String = "Molecular INTeraction database" } case object mycoCLAP extends ResourceAbbreviation { val description: String = "mycoCLAP" } + case object MoonProt extends ResourceAbbreviation { val description: String = "MoonProt database of moonlighting proteins" } case object neXtProt extends ResourceAbbreviation { val description: String = "neXtProt, the human protein knowledge platform" } case object OGP extends ResourceAbbreviation { val description: String = "Oxford GlycoProteomics 2-DE database (OGP)" } case object OMA extends ResourceAbbreviation { val description: String = "Identification of Orthologs from Complete Genome Data" } @@ -316,6 +318,7 @@ sealed trait ResourceAbbreviation { case object ProMEX extends ResourceAbbreviation { val description: String = "Protein Mass spectra EXtraction database" } case object PROSITE extends ResourceAbbreviation { val description: String = "PROSITE protein domain and family database (see 3.25.126)" } case object ProteinModelPortal extends ResourceAbbreviation { val description: String = "Protein Model Portal, a module of the Protein Structure Initiative Knowledgebase (PSI KB) to unify the model data from the different sites." } + case object Proteomes extends ResourceAbbreviation { val description: String = "hola UniProt!" } case object PseudoCAP extends ResourceAbbreviation { val description: String = "Pseudomonas aeruginosa Community Annotation Project" } case object Reactome extends ResourceAbbreviation { val description: String = "Curated resource of core pathways and reactions in human biology (Reactome)" } case object REBASE extends ResourceAbbreviation { val description: String = "Restriction enzymes and methylases database (REBASE)" } @@ -330,7 +333,7 @@ sealed trait ResourceAbbreviation { case object SMR extends ResourceAbbreviation { val description: String = "The SWISS-MODEL Repository (SMR)" } case object STRING extends ResourceAbbreviation { val description: String = "STRING: functional protein association networks" } case object SUPFAM extends ResourceAbbreviation { val description: String = "Superfamily database of structural and functional annotation" } - case object SWISS extends ResourceAbbreviation { val description: String = "-2DPAGE 2D-PAGE database from the Geneva University Hospital (SWISS-2DPAGE)" } + case object `SWISS-2DPAGE` extends ResourceAbbreviation { val description: String = "2D-PAGE database from the Geneva University Hospital (SWISS-2DPAGE)" } case object SwissLipids extends ResourceAbbreviation { val description: String = "SwissLipids knowledge resource for lipid biology" } case object SwissPalm extends ResourceAbbreviation { val description: String = "SwissPalm database of S-palmitoylation events" } case object TAIR extends ResourceAbbreviation { val description: String = "The Arabidopsis Information Resource (TAIR)" } @@ -353,147 +356,151 @@ sealed trait ResourceAbbreviation { case object ResourceAbbreviation { + @inline def fromString(rep: String): ResourceAbbreviation = rep match { - case EMBL.asString => EMBL - case Allergome.asString => Allergome - case ArachnoServer.asString => ArachnoServer - case Bgee.asString => Bgee - case BindingDB.asString => BindingDB - case BioCyc.asString => BioCyc - case BioGrid.asString => BioGrid - case BioMuta.asString => BioMuta - case BRENDA.asString => BRENDA - case CAZy.asString => CAZy - case CCDS.asString => CCDS - case CDD.asString => CDD - case ChEMBL.asString => ChEMBL - case ChiTaRS.asString => ChiTaRS - case CGD.asString => CGD - case CleanEx.asString => CleanEx - case `COMPLUYEAST-2DPAGE`.asString => `COMPLUYEAST-2DPAGE` - case CollecTF.asString => CollecTF - case ConoServer.asString => ConoServer - case CTD.asString => CTD - case dictyBase.asString => dictyBase - case DIP.asString => DIP - case DMDM.asString => DMDM - case DNASU.asString => DNASU - case `DOSAC-COBS-2DPAGE`.asString => `DOSAC-COBS-2DPAGE` - case DisProt.asString => DisProt - case DrugBank.asString => DrugBank - case EchoBASE.asString => EchoBASE - case EcoGene.asString => EcoGene - case eggNOG.asString => eggNOG - case Ensembl.asString => Ensembl - case EnsemblBacteria.asString => EnsemblBacteria - case EnsemblFungi.asString => EnsemblFungi - case EnsemblMetazoa.asString => EnsemblMetazoa - case EnsemblPlants.asString => EnsemblPlants - case EnsemblProtists.asString => EnsemblProtists - case EPD.asString => EPD - case ESTHER.asString => ESTHER - case euHCVdb.asString => euHCVdb - case EuPathDB.asString => EuPathDB - case EvolutionaryTrace.asString => EvolutionaryTrace - case ExpressionAtlas.asString => ExpressionAtlas - case FlyBase.asString => FlyBase - case Gene3D.asString => Gene3D - case GeneCards.asString => GeneCards - case GeneDB.asString => GeneDB - case GeneID.asString => GeneID - case GeneReviews.asString => GeneReviews - case GeneWiki.asString => GeneWiki - case GenomeRNAi.asString => GenomeRNAi - case GeneTree.asString => GeneTree - case Genevisible.asString => Genevisible - case GO.asString => GO - case Gramene.asString => Gramene - case GuidetoPHARMACOLOGY.asString => GuidetoPHARMACOLOGY - case HGNC.asString => HGNC - case `H-InvDB`.asString => `H-InvDB` - case HAMAP.asString => HAMAP - case HOGENOM.asString => HOGENOM - case HOVERGEN.asString => HOVERGEN - case HPA.asString => HPA - case InParanoid.asString => InParanoid - case IntAct.asString => IntAct - case InterPro.asString => InterPro - case IPI.asString => IPI - case iPTMnet.asString => iPTMnet - case KEGG.asString => KEGG - case KO.asString => KO - case LegioList.asString => LegioList - case Leproma.asString => Leproma - case MaizeGDB.asString => MaizeGDB - case MalaCards.asString => MalaCards - case MaxQB.asString => MaxQB - case MEROPS.asString => MEROPS - case MGI.asString => MGI - case MIM.asString => MIM - case MINT.asString => MINT - case mycoCLAP.asString => mycoCLAP - case neXtProt.asString => neXtProt - case OGP.asString => OGP - case OMA.asString => OMA - case Orphanet.asString => Orphanet - case OrthoDB.asString => OrthoDB - case PANTHER.asString => PANTHER - case PATRIC.asString => PATRIC - case PaxDb.asString => PaxDb - case PDB.asString => PDB - case PDBsum.asString => PDBsum - case PeptideAtlas.asString => PeptideAtlas - case PeroxiBase.asString => PeroxiBase - case Pfam.asString => Pfam - case PharmGKB.asString => PharmGKB - case PhosphoSite.asString => PhosphoSite - case PhylomeDB.asString => PhylomeDB - case PIR.asString => PIR - case PIRSF.asString => PIRSF - case `PMAP-CutDB`.asString => `PMAP-CutDB` - case PomBase.asString => PomBase - case PRIDE.asString => PRIDE - case PRINTS.asString => PRINTS - case ProDom.asString => ProDom - case PRO.asString => PRO - case ProMEX.asString => ProMEX - case PROSITE.asString => PROSITE - case ProteinModelPortal.asString => ProteinModelPortal - case PseudoCAP.asString => PseudoCAP - case Reactome.asString => Reactome - case REBASE.asString => REBASE - case RefSeq.asString => RefSeq - case `REPRODUCTION-2DPAGE`.asString => `REPRODUCTION-2DPAGE` - case RGD.asString => RGD - case `SABIO-RK`.asString => `SABIO-RK` - case SGD.asString => SGD - case SignaLink.asString => SignaLink - case SIGNOR.asString => SIGNOR - case SMART.asString => SMART - case SMR.asString => SMR - case STRING.asString => STRING - case SUPFAM.asString => SUPFAM - case SWISS.asString => SWISS - case SwissLipids.asString => SwissLipids - case SwissPalm.asString => SwissPalm - case TAIR.asString => TAIR - case TCDB.asString => TCDB - case TIGRFAMs.asString => TIGRFAMs - case TopDownProteomics.asString => TopDownProteomics - case TreeFam.asString => TreeFam - case TubercuList.asString => TubercuList - case `UCD-2DPAGE`.asString => `UCD-2DPAGE` - case UniGene.asString => UniGene - case UCSC.asString => UCSC - case UniCarbKB.asString => UniCarbKB - case UniPathway.asString => UniPathway - case VectorBase.asString => VectorBase - case `World-2DPAGE`.asString => `World-2DPAGE` - case WormBase.asString => WormBase - case WBParaSite.asString => WBParaSite - case Xenbase.asString => Xenbase - case ZFIN.asString => ZFIN + case "EMBL" => EMBL + case "Allergome" => Allergome + case "ArachnoServer" => ArachnoServer + case "Bgee" => Bgee + case "BindingDB" => BindingDB + case "BioCyc" => BioCyc + case "BioGrid" => BioGrid + case "BioMuta" => BioMuta + case "BRENDA" => BRENDA + case "CAZy" => CAZy + case "CCDS" => CCDS + case "CDD" => CDD + case "ChEMBL" => ChEMBL + case "ChiTaRS" => ChiTaRS + case "CGD" => CGD + case "CleanEx" => CleanEx + case "COMPLUYEAST-2DPAGE" => `COMPLUYEAST-2DPAGE` + case "CollecTF" => CollecTF + case "ConoServer" => ConoServer + case "CTD" => CTD + case "dictyBase" => dictyBase + case "DEPOD" => DEPOD + case "DIP" => DIP + case "DMDM" => DMDM + case "DNASU" => DNASU + case "DOSAC-COBS-2DPAGE" => `DOSAC-COBS-2DPAGE` + case "DisProt" => DisProt + case "DrugBank" => DrugBank + case "EchoBASE" => EchoBASE + case "EcoGene" => EcoGene + case "eggNOG" => eggNOG + case "Ensembl" => Ensembl + case "EnsemblBacteria" => EnsemblBacteria + case "EnsemblFungi" => EnsemblFungi + case "EnsemblMetazoa" => EnsemblMetazoa + case "EnsemblPlants" => EnsemblPlants + case "EnsemblProtists" => EnsemblProtists + case "EPD" => EPD + case "ESTHER" => ESTHER + case "euHCVdb" => euHCVdb + case "EuPathDB" => EuPathDB + case "EvolutionaryTrace" => EvolutionaryTrace + case "ExpressionAtlas" => ExpressionAtlas + case "FlyBase" => FlyBase + case "Gene3D" => Gene3D + case "GeneCards" => GeneCards + case "GeneDB" => GeneDB + case "GeneID" => GeneID + case "GeneReviews" => GeneReviews + case "GeneWiki" => GeneWiki + case "GenomeRNAi" => GenomeRNAi + case "GeneTree" => GeneTree + case "Genevisible" => Genevisible + case "GO" => GO + case "Gramene" => Gramene + case "GuidetoPHARMACOLOGY" => GuidetoPHARMACOLOGY + case "HGNC" => HGNC + case "H-InvDB" => `H-InvDB` + case "HAMAP" => HAMAP + case "HOGENOM" => HOGENOM + case "HOVERGEN" => HOVERGEN + case "HPA" => HPA + case "InParanoid" => InParanoid + case "IntAct" => IntAct + case "InterPro" => InterPro + case "IPI" => IPI + case "iPTMnet" => iPTMnet + case "KEGG" => KEGG + case "KO" => KO + case "LegioList" => LegioList + case "Leproma" => Leproma + case "MaizeGDB" => MaizeGDB + case "MalaCards" => MalaCards + case "MaxQB" => MaxQB + case "MEROPS" => MEROPS + case "MGI" => MGI + case "MIM" => MIM + case "MINT" => MINT + case "mycoCLAP" => mycoCLAP + case "MoonProt" => MoonProt + case "neXtProt" => neXtProt + case "OGP" => OGP + case "OMA" => OMA + case "Orphanet" => Orphanet + case "OrthoDB" => OrthoDB + case "PANTHER" => PANTHER + case "PATRIC" => PATRIC + case "PaxDb" => PaxDb + case "PDB" => PDB + case "PDBsum" => PDBsum + case "PeptideAtlas" => PeptideAtlas + case "PeroxiBase" => PeroxiBase + case "Pfam" => Pfam + case "PharmGKB" => PharmGKB + case "PhosphoSite" => PhosphoSite + case "PhylomeDB" => PhylomeDB + case "PIR" => PIR + case "PIRSF" => PIRSF + case "PMAP-CutDB" => `PMAP-CutDB` + case "PomBase" => PomBase + case "PRIDE" => PRIDE + case "PRINTS" => PRINTS + case "ProDom" => ProDom + case "PRO" => PRO + case "ProMEX" => ProMEX + case "PROSITE" => PROSITE + case "ProteinModelPortal" => ProteinModelPortal + case "Proteomes" => Proteomes + case "PseudoCAP" => PseudoCAP + case "Reactome" => Reactome + case "REBASE" => REBASE + case "RefSeq" => RefSeq + case "REPRODUCTION-2DPAGE" => `REPRODUCTION-2DPAGE` + case "RGD" => RGD + case "SABIO-RK" => `SABIO-RK` + case "SGD" => SGD + case "SignaLink" => SignaLink + case "SIGNOR" => SIGNOR + case "SMART" => SMART + case "SMR" => SMR + case "STRING" => STRING + case "SUPFAM" => SUPFAM + case "SWISS-2DPAGE" => `SWISS-2DPAGE` + case "SwissLipids" => SwissLipids + case "SwissPalm" => SwissPalm + case "TAIR" => TAIR + case "TCDB" => TCDB + case "TIGRFAMs" => TIGRFAMs + case "TopDownProteomics" => TopDownProteomics + case "TreeFam" => TreeFam + case "TubercuList" => TubercuList + case "UCD-2DPAGE" => `UCD-2DPAGE` + case "UniGene" => UniGene + case "UCSC" => UCSC + case "UniCarbKB" => UniCarbKB + case "UniPathway" => UniPathway + case "VectorBase" => VectorBase + case "World-2DPAGE" => `World-2DPAGE` + case "WormBase" => WormBase + case "WBParaSite" => WBParaSite + case "Xenbase" => Xenbase + case "ZFIN" => ZFIN } } ``` @@ -634,6 +641,7 @@ case class Sequence(val value: String) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/AC.scala.md b/docs/src/main/scala/flat/AC.scala.md index a0574f4..e339944 100644 --- a/docs/src/main/scala/flat/AC.scala.md +++ b/docs/src/main/scala/flat/AC.scala.md @@ -32,6 +32,7 @@ case class AC(val lines: Seq[String]) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/CC.scala.md b/docs/src/main/scala/flat/CC.scala.md index beb2ecb..81c2a7d 100644 --- a/docs/src/main/scala/flat/CC.scala.md +++ b/docs/src/main/scala/flat/CC.scala.md @@ -17,46 +17,46 @@ case class CC(val lines: Seq[String]) { topic match { - case "ALLERGEN" => Vector( Allergen(contents.mkString(" ")) ) + case "ALLERGEN" => List( Allergen(contents.mkString(" ")) ) case "ALTERNATIVE PRODUCTS" => isoformBlocks(contents.tail) map isoformFromBlock - case "BIOPHYSICOCHEMICAL PROPERTIES" => Vector( BiophysicochemicalProperties(contents.mkString(" ")) ) - case "BIOTECHNOLOGY" => Vector( Biotechnology(contents.mkString(" ")) ) - case "CATALYTIC ACTIVITY" => Vector( CatalyticActivity(contents.mkString(" ")) ) - case "CAUTION" => Vector( Caution(contents.mkString(" ")) ) - case "COFACTOR" => Vector( Cofactor(contents.mkString(" ")) ) - case "DEVELOPMENTAL STAGE" => Vector( DevelopmentalStage(contents.mkString(" ")) ) - case "DISEASE" => Vector( Disease(contents.mkString(" ")) ) - case "DISRUPTION PHENOTYPE" => Vector( DisruptionPhenotype(contents.mkString(" ")) ) - case "DOMAIN" => Vector( Domain(contents.mkString(" ")) ) - case "ENZYME REGULATION" => Vector( EnzymeRegulation(contents.mkString(" ")) ) - case "FUNCTION" => Vector( Function(contents.mkString(" ")) ) - case "INDUCTION" => Vector( Induction(contents.mkString(" ")) ) - case "INTERACTION" => Vector( Interaction(contents.mkString(" ")) ) - case "MASS SPECTROMETRY" => Vector( MassSpectrometry(contents.mkString(" ")) ) - case "MISCELLANEOUS" => Vector( Miscellaneous(contents.mkString(" ")) ) - case "PATHWAY" => Vector( Pathway(contents.mkString(" ")) ) - case "PHARMACEUTICAL" => Vector( Pharmaceutical(contents.mkString(" ")) ) - case "POLYMORPHISM" => Vector( Polymorphism(contents.mkString(" ")) ) - case "PTM" => Vector( PTM(contents.mkString(" ")) ) - case "RNA EDITING" => Vector( RNAEditing(contents.mkString(" ")) ) - case "SEQUENCE CAUTION" => Vector( SequenceCaution(contents.mkString(" ")) ) - case "SIMILARITY" => Vector( Similarity(contents.mkString(" ")) ) - case "SUBCELLULAR LOCATION" => Vector( SubcellularLocation(contents.mkString(" ")) ) - case "SUBUNIT" => Vector( Subunit(contents.mkString(" ")) ) - case "TISSUE SPECIFICITY" => Vector( TissueSpecificity(contents.mkString(" ")) ) - case "TOXIC DOSE" => Vector( ToxicDose(contents.mkString(" ")) ) - case "WEB RESOURCE" => Vector( WebResource(contents.mkString(" ")) ) + case "BIOPHYSICOCHEMICAL PROPERTIES" => List( BiophysicochemicalProperties(contents.mkString(" ")) ) + case "BIOTECHNOLOGY" => List( Biotechnology(contents.mkString(" ")) ) + case "CATALYTIC ACTIVITY" => List( CatalyticActivity(contents.mkString(" ")) ) + case "CAUTION" => List( Caution(contents.mkString(" ")) ) + case "COFACTOR" => List( Cofactor(contents.mkString(" ")) ) + case "DEVELOPMENTAL STAGE" => List( DevelopmentalStage(contents.mkString(" ")) ) + case "DISEASE" => List( Disease(contents.mkString(" ")) ) + case "DISRUPTION PHENOTYPE" => List( DisruptionPhenotype(contents.mkString(" ")) ) + case "DOMAIN" => List( Domain(contents.mkString(" ")) ) + case "ENZYME REGULATION" => List( EnzymeRegulation(contents.mkString(" ")) ) + case "FUNCTION" => List( Function(contents.mkString(" ")) ) + case "INDUCTION" => List( Induction(contents.mkString(" ")) ) + case "INTERACTION" => List( Interaction(contents.mkString(" ")) ) + case "MASS SPECTROMETRY" => List( MassSpectrometry(contents.mkString(" ")) ) + case "MISCELLANEOUS" => List( Miscellaneous(contents.mkString(" ")) ) + case "PATHWAY" => List( Pathway(contents.mkString(" ")) ) + case "PHARMACEUTICAL" => List( Pharmaceutical(contents.mkString(" ")) ) + case "POLYMORPHISM" => List( Polymorphism(contents.mkString(" ")) ) + case "PTM" => List( PTM(contents.mkString(" ")) ) + case "RNA EDITING" => List( RNAEditing(contents.mkString(" ")) ) + case "SEQUENCE CAUTION" => List( SequenceCaution(contents.mkString(" ")) ) + case "SIMILARITY" => List( Similarity(contents.mkString(" ")) ) + case "SUBCELLULAR LOCATION" => List( SubcellularLocation(contents.mkString(" ")) ) + case "SUBUNIT" => List( Subunit(contents.mkString(" ")) ) + case "TISSUE SPECIFICITY" => List( TissueSpecificity(contents.mkString(" ")) ) + case "TOXIC DOSE" => List( ToxicDose(contents.mkString(" ")) ) + case "WEB RESOURCE" => List( WebResource(contents.mkString(" ")) ) } } private def commentBlocks(commentLines: Seq[String]): Seq[Seq[String]] = - commentLines.foldLeft[Seq[Seq[String]]](Vector()){ (acc: Seq[Seq[String]], line: String) => + commentLines.foldLeft[collection.mutable.Buffer[Seq[String]]](new collection.mutable.UnrolledBuffer[Seq[String]]){ (acc: collection.mutable.Buffer[Seq[String]], line: String) => // extra lines for a comment if(line startsWith " ") { acc.updated(acc.length - 1, acc.last :+ line.trim) } else { - acc :+ Vector(line.stripPrefix("-!-").trim) + acc += List(line.stripPrefix("-!-").trim) } } ``` @@ -87,12 +87,12 @@ The input here has lines **already trimmed**. private def isoformBlocks(altProdLines: Seq[String]): Seq[Seq[String]] = altProdLines .dropWhile(altProdLine => !altProdLine.startsWith("Name=")) - .foldLeft(Seq[Seq[String]]()){ (acc: Seq[Seq[String]], line: String) => + .foldLeft(new collection.mutable.UnrolledBuffer[Seq[String]]()){ (acc: collection.mutable.UnrolledBuffer[Seq[String]], line: String) => // same iso if(!(line startsWith "Name=")) acc.updated(acc.length - 1, acc.last :+ line.trim) else - acc :+ Vector(line.trim) + acc += List(line.trim) } ``` @@ -123,6 +123,7 @@ IsoId=Q15746-3; Sequence=VSP_004792, VSP_004794; +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/DE.scala.md b/docs/src/main/scala/flat/DE.scala.md index 5ac3c86..4834ed6 100644 --- a/docs/src/main/scala/flat/DE.scala.md +++ b/docs/src/main/scala/flat/DE.scala.md @@ -163,6 +163,7 @@ case object DE { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/DR.scala.md b/docs/src/main/scala/flat/DR.scala.md index 3c931fe..36d4cb8 100644 --- a/docs/src/main/scala/flat/DR.scala.md +++ b/docs/src/main/scala/flat/DR.scala.md @@ -20,19 +20,19 @@ Bgee; ENSMUSG00000032315; -. ```scala case class DR(val lines: Seq[String]) extends AnyVal { + @inline def databaseCrossReferences: Seq[DatabaseCrossReference] = lines map { line => - val fragments = line.splitSegments(_==';') - val resourceAbbrv = fragments(0).trim - val id = fragments(1).trim + val (firstFrag, rest1) = line.span(_!=';') + val (secondFrag, rest2) = rest1.stripPrefix(";").span(_!=';') DatabaseCrossReference( - resource = ResourceAbbreviation.fromString(resourceAbbrv), - identifier = id, + resource = ResourceAbbreviation.fromString(firstFrag.trim), + identifier = secondFrag.trim // TODO other info? - otherInformation = None, - isoformID = None + // otherInformation = None, + // isoformID = None ) } } @@ -42,6 +42,7 @@ case class DR(val lines: Seq[String]) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/DT.scala.md b/docs/src/main/scala/flat/DT.scala.md index 815863a..ae66cb8 100644 --- a/docs/src/main/scala/flat/DT.scala.md +++ b/docs/src/main/scala/flat/DT.scala.md @@ -65,6 +65,7 @@ case class DT(val value: Seq[String]) { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/Entry.scala.md b/docs/src/main/scala/flat/Entry.scala.md index 2e08d6e..6c928cb 100644 --- a/docs/src/main/scala/flat/Entry.scala.md +++ b/docs/src/main/scala/flat/Entry.scala.md @@ -9,16 +9,16 @@ import java.time.LocalDate case class Entry( val allLines: Seq[String] ) -extends AnyEntry { +extends AnyVal with AnyEntry { - private lazy val id: ID = + final def id: ID = ID(linesOfType(LineType.ID).head) @inline final def identification: Identification = id.identification - private lazy val ac: AC = + final def ac: AC = AC(linesOfType(LineType.AC)) @inline @@ -33,21 +33,21 @@ extends AnyEntry { ) } - private lazy val dt: DT = + final def dt: DT = DT(linesOfType(LineType.DT)) @inline final def date: Date = dt.date - private lazy val de: DE = + final def de: DE = DE(linesOfType(LineType.DE)) @inline final def description: Description = de.description - private lazy val gn: GN = + final def gn: GN = GN(linesOfType(LineType.GN)) @inline @@ -58,87 +58,92 @@ extends AnyEntry { final def organismSpecies: OrganismSpecies = ??? - private lazy val og: OG = - OG(linesOfType(LineType.GN)) + final def og: OG = + OG(linesOfType(LineType.OG)) - lazy val organelles: Seq[Organelle] = + @inline + final def organelles: Seq[Organelle] = og.organelles @inline final def organismClassification: OrganismClassification = ??? - private lazy val ox: OX = + final def ox: OX = OX(linesOfType(LineType.OX).head) @inline final def taxonomyCrossReference: TaxonomyCrossReference = ox.taxonomyCrossReference - private lazy val oh: OH = + final def oh: OH = OH(linesOfType(LineType.OH)) @inline final def organismHost: Seq[TaxonomyCrossReference] = oh.taxonomyCrossReferences - private lazy val cc: CC = + final def cc: CC = CC(linesOfType(LineType.CC)) @inline final def comments: Seq[Comment] = cc.comments - private lazy val dr: DR = - DR(linesOfType(LineType.DR)) + final def dr: DR = + DR(linesOfType(LineType.DR).toList) @inline final def databaseCrossReferences: Seq[DatabaseCrossReference] = dr.databaseCrossReferences - private lazy val pe: PE = + final def pe: PE = PE(linesOfType(LineType.PE).head) @inline final def proteinExistence: ProteinExistence = pe.proteinExistence - private lazy val kw: KW = + final def kw: KW = KW(linesOfType(LineType.KW)) @inline final def keywords: Seq[Keyword] = kw.keywords - private lazy val ft: FT = + final def ft: FT = FT(linesOfType(LineType.FT)) @inline final def features: Seq[Feature] = ft.features - private lazy val sq: SQ = + final def sq: SQ = SQ(linesOfType(LineType.SQ).head) @inline final def sequenceHeader: SequenceHeader = sq.sequenceHeader - private lazy val sd: SequenceData = + final def sd: SequenceData = SequenceData(linesOfType(LineType.` `)) @inline final def sequence: Sequence = sd.sequence - - - private def linesOfType(lt: LineType) = - (allLines filter Line.isOfType(lt)).map(Line.contentOf) + @inline + final def linesOfType(lt: LineType) = + allLines + .dropWhile(l => !(l startsWith lt.asString)) + .takeWhile(_ startsWith lt.asString) + .map(_ drop 5) + // allLines collect { case l if(l startsWith lt.toString) => l drop 5 } } case object Entry { + @inline def from(lns: Seq[String]): Entry = Entry(lns) } @@ -148,6 +153,7 @@ case object Entry { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/FT.scala.md b/docs/src/main/scala/flat/FT.scala.md index 2eb2a2c..01b88e6 100644 --- a/docs/src/main/scala/flat/FT.scala.md +++ b/docs/src/main/scala/flat/FT.scala.md @@ -55,6 +55,7 @@ case class FT(val lines: Seq[String]) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/GN.scala.md b/docs/src/main/scala/flat/GN.scala.md index 96c8121..28539d9 100644 --- a/docs/src/main/scala/flat/GN.scala.md +++ b/docs/src/main/scala/flat/GN.scala.md @@ -93,6 +93,7 @@ case object GN { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/ID.scala.md b/docs/src/main/scala/flat/ID.scala.md index 20b94f5..46805b3 100644 --- a/docs/src/main/scala/flat/ID.scala.md +++ b/docs/src/main/scala/flat/ID.scala.md @@ -53,6 +53,7 @@ case class ID(val value: String) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/KW.scala.md b/docs/src/main/scala/flat/KW.scala.md index ba63040..a2fd39f 100644 --- a/docs/src/main/scala/flat/KW.scala.md +++ b/docs/src/main/scala/flat/KW.scala.md @@ -16,6 +16,7 @@ case class KW(val lines: Seq[String]) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/OG.scala.md b/docs/src/main/scala/flat/OG.scala.md index 4e640bf..8e84b91 100644 --- a/docs/src/main/scala/flat/OG.scala.md +++ b/docs/src/main/scala/flat/OG.scala.md @@ -14,29 +14,29 @@ case class OG(val lines: Seq[String]) extends AnyVal { case object OG { def organellesFromLines(reps: Seq[String]): Seq[Organelle] = - reps.head match { + if(reps.isEmpty) Seq() else + reps.head match { - case "Hydrogenosome." => Seq(Hydrogenosome) - case "Mitochondrion." => Seq(Mitochondrion) - case "Nucleomorph." => Seq(Nucleomorph) - case "Plastid." => Seq(Plastid) - case x if(x startsWith "Plastid;") => - (x stripPrefix "Plastid;").trim match { - case "Apicoplast." => Seq(Apicoplast) - case "Chloroplast." => Seq(Chloroplast) - case "Organellar chromatophore." => Seq(OrganellarChromatophore) - case "Cyanelle." => Seq(Cyanelle) - case "Non-photosynthetic plastid." => Seq(NonPhotosyntheticPlastid) - } - case y if(y startsWith "Plasmid") => - reps - .map(p => p.stripSuffix(" and")) - .flatMap { - _.splitSegments(_ == ',') - .map{ frgmt => Plasmid(frgmt.trim.stripPrefix("Plasmid").trim.stripSuffix(".")) } + case x if (x startsWith "Hydrogenosome") => Seq(Hydrogenosome) + case x if (x startsWith "Mitochondrion") => Seq(Mitochondrion) + case x if (x startsWith "Nucleomorph") => Seq(Nucleomorph) + case x if (x startsWith "Plastid") => Seq(Plastid) + case x if (x startsWith "Plastid;") => + (x stripPrefix "Plastid;").trim match { + case x if (x startsWith "Apicoplast") => Seq(Apicoplast) + case x if (x startsWith "Chloroplast") => Seq(Chloroplast) + case x if (x startsWith "Organellar chromatophore") => Seq(OrganellarChromatophore) + case x if (x startsWith "Cyanelle") => Seq(Cyanelle) + case x if (x startsWith "Non-photosynthetic plastid") => Seq(NonPhotosyntheticPlastid) } - } - + case y if (y startsWith "Plasmid") => + reps + .map(p => p.stripSuffix(" and")) + .flatMap { + _.splitSegments(_ == ',') + .map{ frgmt => Plasmid(frgmt.trim.stripPrefix("Plasmid").trim.stripSuffix(".")) } + } + } } ``` @@ -44,6 +44,7 @@ case object OG { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/OH.scala.md b/docs/src/main/scala/flat/OH.scala.md index 059d4ab..b219ac4 100644 --- a/docs/src/main/scala/flat/OH.scala.md +++ b/docs/src/main/scala/flat/OH.scala.md @@ -32,6 +32,7 @@ case class OH(val lines: Seq[String]) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/OS.scala.md b/docs/src/main/scala/flat/OS.scala.md index 10ad31e..c46e797 100644 --- a/docs/src/main/scala/flat/OS.scala.md +++ b/docs/src/main/scala/flat/OS.scala.md @@ -9,6 +9,7 @@ case class OS(val lines: Seq[String]) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/OX.scala.md b/docs/src/main/scala/flat/OX.scala.md index aa9fec1..b76f5a2 100644 --- a/docs/src/main/scala/flat/OX.scala.md +++ b/docs/src/main/scala/flat/OX.scala.md @@ -19,6 +19,7 @@ case class OX(val line: String) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/PE.scala.md b/docs/src/main/scala/flat/PE.scala.md index 23ee688..9b1d5d1 100644 --- a/docs/src/main/scala/flat/PE.scala.md +++ b/docs/src/main/scala/flat/PE.scala.md @@ -21,6 +21,7 @@ case class PE(val line: String) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/RA.scala.md b/docs/src/main/scala/flat/RA.scala.md index 5905b29..f44edf2 100644 --- a/docs/src/main/scala/flat/RA.scala.md +++ b/docs/src/main/scala/flat/RA.scala.md @@ -9,6 +9,7 @@ case class RA(val lines: Seq[String]) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/RC.scala.md b/docs/src/main/scala/flat/RC.scala.md index 572a89f..82329f8 100644 --- a/docs/src/main/scala/flat/RC.scala.md +++ b/docs/src/main/scala/flat/RC.scala.md @@ -9,6 +9,7 @@ case class RC(val lines: Seq[String]) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/RG.scala.md b/docs/src/main/scala/flat/RG.scala.md index 12c52e5..f048c12 100644 --- a/docs/src/main/scala/flat/RG.scala.md +++ b/docs/src/main/scala/flat/RG.scala.md @@ -9,6 +9,7 @@ case class RG(val lines: Seq[String]) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/RL.scala.md b/docs/src/main/scala/flat/RL.scala.md index ce7b6db..f53cb6e 100644 --- a/docs/src/main/scala/flat/RL.scala.md +++ b/docs/src/main/scala/flat/RL.scala.md @@ -9,6 +9,7 @@ case class RL(val lines: Seq[String]) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/RN.scala.md b/docs/src/main/scala/flat/RN.scala.md index bbde914..249eab7 100644 --- a/docs/src/main/scala/flat/RN.scala.md +++ b/docs/src/main/scala/flat/RN.scala.md @@ -9,6 +9,7 @@ case class RN(val lines: Seq[String]) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/RP.scala.md b/docs/src/main/scala/flat/RP.scala.md index 1169a4a..a28f22a 100644 --- a/docs/src/main/scala/flat/RP.scala.md +++ b/docs/src/main/scala/flat/RP.scala.md @@ -9,6 +9,7 @@ case class RP(val lines: Seq[String]) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/RT.scala.md b/docs/src/main/scala/flat/RT.scala.md index 5848832..94fbfcb 100644 --- a/docs/src/main/scala/flat/RT.scala.md +++ b/docs/src/main/scala/flat/RT.scala.md @@ -9,6 +9,7 @@ case class RT(val lines: Seq[String]) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/RX.scala.md b/docs/src/main/scala/flat/RX.scala.md index 237c9fa..41898a5 100644 --- a/docs/src/main/scala/flat/RX.scala.md +++ b/docs/src/main/scala/flat/RX.scala.md @@ -9,6 +9,7 @@ case class RX(val lines: Seq[String]) extends AnyVal +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/SQ.scala.md b/docs/src/main/scala/flat/SQ.scala.md index 628896e..b33c38f 100644 --- a/docs/src/main/scala/flat/SQ.scala.md +++ b/docs/src/main/scala/flat/SQ.scala.md @@ -9,23 +9,25 @@ case class SQ(val line: String) extends AnyVal { def sequenceHeader: SequenceHeader = { - val fragments = line.splitSegments(_==';') + // val fragments = line.splitSegments(_==';') + + val (frg0, rest0) = line.stripPrefix("SEQUENCE").dropWhile(_==' ').span(_!=';') + val (frg1, rest1) = rest0.stripPrefix(";").span(_!=';') + val (frg2, rest2) = rest1.stripPrefix(";").span(_!=';') val l = - fragments(0) - .stripPrefix("SEQUENCE") - .dropWhile(_==' ') + frg0 .takeWhile(_!=' ') .toInt val mw = - fragments(1) + frg1 .dropWhile(_==' ') .takeWhile(_!=' ') .toInt val crc = - fragments(2) + frg2 .dropWhile(_==' ') .takeWhile(_!=' ') @@ -42,6 +44,7 @@ case class SQ(val line: String) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/SequenceData.scala.md b/docs/src/main/scala/flat/SequenceData.scala.md index 2710eef..82e5dcc 100644 --- a/docs/src/main/scala/flat/SequenceData.scala.md +++ b/docs/src/main/scala/flat/SequenceData.scala.md @@ -7,11 +7,9 @@ import bio4j.data.uniprot.seqOps._ case class SequenceData(val lines: Seq[String]) extends AnyVal { - def sequence: Sequence = - Sequence( (lines map lineToSequence).mkString("") ) - - private def lineToSequence(line: String): String = - line.splitSegments(_==' ').mkString("") + @inline + final def sequence: Sequence = + Sequence( lines.mkString("").filter(_ != ' ') ) } ``` @@ -19,6 +17,7 @@ case class SequenceData(val lines: Seq[String]) extends AnyVal { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/lineTypes.scala.md b/docs/src/main/scala/flat/lineTypes.scala.md index 2e65a54..8ae1518 100644 --- a/docs/src/main/scala/flat/lineTypes.scala.md +++ b/docs/src/main/scala/flat/lineTypes.scala.md @@ -71,6 +71,7 @@ case object Line { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/flat/parsers.scala.md b/docs/src/main/scala/flat/parsers.scala.md index f53609c..61c7a1d 100644 --- a/docs/src/main/scala/flat/parsers.scala.md +++ b/docs/src/main/scala/flat/parsers.scala.md @@ -18,18 +18,18 @@ case object parsers { entry @annotation.tailrec - private def entry_rec(acc: Vector[String]): Vector[String] = + private def entry_rec(acc: collection.mutable.Buffer[String]): Array[String] = if (rest.hasNext) { if( rest.head.startsWith("//") ) { rest.next() - acc + acc.toArray } - else entry_rec(acc :+ rest.next()) + else entry_rec(acc += rest.next()) } - else acc + else acc.toArray - private def entry: Seq[String] = entry_rec(Vector()) + private def entry: Seq[String] = entry_rec(new collection.mutable.UnrolledBuffer()) } // see http://stackoverflow.com/a/33521793/614394 @@ -48,6 +48,7 @@ case object parsers { +[test/scala/LineParsingSpeed.scala]: ../../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/main/scala/seqOps.scala.md b/docs/src/main/scala/seqOps.scala.md index 573066d..699bdc9 100644 --- a/docs/src/main/scala/seqOps.scala.md +++ b/docs/src/main/scala/seqOps.scala.md @@ -13,17 +13,17 @@ Splits `s` into segments according to `pred`: those intervals at which `pred` is ```scala def splitSegments(pred: A => Boolean): Seq[Seq[A]] = - splitSegments_rec(s, Seq(), pred) + splitSegments_rec(s, new collection.mutable.UnrolledBuffer[Seq[A]](), pred) @annotation.tailrec - private def splitSegments_rec[X](xs: Seq[X], acc: Seq[Seq[X]], pred: X => Boolean): Seq[Seq[X]] = { + private def splitSegments_rec[X](xs: Seq[X], acc: collection.mutable.UnrolledBuffer[Seq[X]], pred: X => Boolean): Seq[Seq[X]] = { val (segment, rest) = xs span { x => (!pred(x)) } - val nextAcc = if(segment.isEmpty) acc else acc :+ segment + val nextAcc = if(segment.isEmpty) acc else acc += segment if(rest.isEmpty) - nextAcc + nextAcc.toVector else splitSegments_rec( xs = rest dropWhile { x => pred(x) }, @@ -37,17 +37,17 @@ Splits `s` into segments according to `pred`: those intervals at which `pred` is implicit class StringOps(val str: String) extends AnyVal { def splitSegments(pred: Char => Boolean): Seq[String] = - splitSegments_rec(str, Seq(), pred) + splitSegments_rec(str, collection.mutable.UnrolledBuffer(), pred) @annotation.tailrec - private def splitSegments_rec(xs: String, acc: Seq[String], pred: Char => Boolean): Seq[String] = { + private def splitSegments_rec(xs: String, acc: collection.mutable.Buffer[String], pred: Char => Boolean): Seq[String] = { val (segment, rest) = xs span { x => (!pred(x)) } - val nextAcc = if(segment.isEmpty) acc else acc :+segment + val nextAcc = if(segment.isEmpty) acc else acc += segment if(rest.isEmpty) - nextAcc + nextAcc.toVector else splitSegments_rec( xs = rest dropWhile { x => pred(x) }, @@ -63,6 +63,7 @@ Splits `s` into segments according to `pred`: those intervals at which `pred` is +[test/scala/LineParsingSpeed.scala]: ../../test/scala/LineParsingSpeed.scala.md [test/scala/lines.scala]: ../../test/scala/lines.scala.md [test/scala/testData.scala]: ../../test/scala/testData.scala.md [test/scala/FlatFileEntry.scala]: ../../test/scala/FlatFileEntry.scala.md diff --git a/docs/src/test/scala/EntryParsingSpeed.scala.md b/docs/src/test/scala/EntryParsingSpeed.scala.md index eb37df7..7e50827 100644 --- a/docs/src/test/scala/EntryParsingSpeed.scala.md +++ b/docs/src/test/scala/EntryParsingSpeed.scala.md @@ -3,52 +3,39 @@ package bio4j.data.uniprot.test import org.scalatest.FunSuite +import org.scalatest.concurrent.TimeLimitedTests +import org.scalatest.time.SpanSugar._ import bio4j.test.ReleaseOnlyTest import bio4j.data.uniprot._ import java.time.LocalDate -class EntryParsingSpeed extends FunSuite { +class EntryParsingSpeed extends FunSuite with TimeLimitedTests { - // more or less the same as the raw read speed - test("split whole SwissProt into entry lines", ReleaseOnlyTest) { + def timeLimit = 100 seconds - flat.parsers.entries(testData.swissProtLines).foreach { e => () } - } - - // more or less the same as the raw read speed; everything's lazy here - test("parse whole SwissProt", ReleaseOnlyTest) { - - flat.parsers.entries(testData.swissProtLines).map(flat.Entry.from).foreach { e => () } - } - - // ~26s - test("parse whole SwissProt, access some data", ReleaseOnlyTest) { + import testData.entries - flat.parsers.entries(testData.swissProtLines).map(flat.Entry.from).foreach { e => + test("SwissProt all entry fields", ReleaseOnlyTest) { - val z = e.accessionNumbers.primary - val u = e.date.creation - val v = e.identification.status + entries.foreach { e => - e.description.recommendedName.foreach { n => if(n.full.isEmpty) println("empty full name!!") } + val id = e.identification; + val ac = e.accessionNumbers; + val dt = e.date + val de = e.description + val gn = e.geneNames + val og = e.organelles + val ox = e.taxonomyCrossReference + val oh = e.organismHost + val cc = e.comments + val dr = e.databaseCrossReferences + val pe = e.proteinExistence + val kw = e.keywords + val ft = e.features + val sq = e.sequenceHeader + val sd = e.sequence } } - - // ~15s - test ("All SwissProt entries have a full name", ReleaseOnlyTest) { - - val noOfEntries = 551987 - - val fullNameCount = - flat.parsers.entries(testData.swissProtLines) - .map(flat.Entry.from) - .foldLeft(0){ (acc, e) => - - acc + e.description.recommendedName.fold(0){_ => 1} - } - - assert { fullNameCount == noOfEntries } - } } ``` @@ -56,6 +43,7 @@ class EntryParsingSpeed extends FunSuite { +[test/scala/LineParsingSpeed.scala]: LineParsingSpeed.scala.md [test/scala/lines.scala]: lines.scala.md [test/scala/testData.scala]: testData.scala.md [test/scala/FlatFileEntry.scala]: FlatFileEntry.scala.md diff --git a/docs/src/test/scala/FileReadSpeed.scala.md b/docs/src/test/scala/FileReadSpeed.scala.md index 1a34e70..e8d0d73 100644 --- a/docs/src/test/scala/FileReadSpeed.scala.md +++ b/docs/src/test/scala/FileReadSpeed.scala.md @@ -21,6 +21,7 @@ class FileReadSpeed extends FunSuite { +[test/scala/LineParsingSpeed.scala]: LineParsingSpeed.scala.md [test/scala/lines.scala]: lines.scala.md [test/scala/testData.scala]: testData.scala.md [test/scala/FlatFileEntry.scala]: FlatFileEntry.scala.md diff --git a/docs/src/test/scala/FlatFileEntry.scala.md b/docs/src/test/scala/FlatFileEntry.scala.md index 40ee2cd..d356d0f 100644 --- a/docs/src/test/scala/FlatFileEntry.scala.md +++ b/docs/src/test/scala/FlatFileEntry.scala.md @@ -48,6 +48,7 @@ class FlatFileEntry extends FunSuite { +[test/scala/LineParsingSpeed.scala]: LineParsingSpeed.scala.md [test/scala/lines.scala]: lines.scala.md [test/scala/testData.scala]: testData.scala.md [test/scala/FlatFileEntry.scala]: FlatFileEntry.scala.md diff --git a/docs/src/test/scala/LineParsingSpeed.scala.md b/docs/src/test/scala/LineParsingSpeed.scala.md new file mode 100644 index 0000000..711a3e4 --- /dev/null +++ b/docs/src/test/scala/LineParsingSpeed.scala.md @@ -0,0 +1,75 @@ + +```scala +package bio4j.data.uniprot.test + +import org.scalatest.FunSuite +import org.scalatest.concurrent.TimeLimitedTests +import org.scalatest.time.SpanSugar._ +import bio4j.test.ReleaseOnlyTest +import bio4j.data.uniprot._ +import java.time.LocalDate + +class LinesParsingSpeed extends FunSuite with TimeLimitedTests { + + def timeLimit = 30 seconds + + import testData.entries + + test("SwissProt ID", ReleaseOnlyTest) { entries.foreach { e => val id = e.identification; } } + test("SwissProt AC", ReleaseOnlyTest) { entries.foreach { e => val ac = e.accessionNumbers; } } + test("SwissProt DT", ReleaseOnlyTest) { entries.foreach { e => val dt = e.date } } + test("SwissProt DE", ReleaseOnlyTest) { entries.foreach { e => val de = e.description } } + test("SwissProt GN", ReleaseOnlyTest) { entries.foreach { e => val gn = e.geneNames } } + test("SwissProt OG", ReleaseOnlyTest) { entries.foreach { e => val og = e.organelles } } + test("SwissProt OX", ReleaseOnlyTest) { entries.foreach { e => val ox = e.taxonomyCrossReference } } + test("SwissProt OH", ReleaseOnlyTest) { entries.foreach { e => val oh = e.organismHost } } + test("SwissProt CC", ReleaseOnlyTest) { entries.foreach { e => val cc = e.comments } } + test("SwissProt DR", ReleaseOnlyTest) { entries.foreach { e => val dr = e.databaseCrossReferences } } + test("SwissProt PE", ReleaseOnlyTest) { entries.foreach { e => val pe = e.proteinExistence } } + test("SwissProt KW", ReleaseOnlyTest) { entries.foreach { e => val kw = e.keywords } } + test("SwissProt FT", ReleaseOnlyTest) { entries.foreach { e => val ft = e.features } } + test("SwissProt SQ", ReleaseOnlyTest) { entries.foreach { e => val sq = e.sequenceHeader } } + test("SwissProt --", ReleaseOnlyTest) { entries.foreach { e => val x = e.sequence } } +} + +``` + + + + +[test/scala/LineParsingSpeed.scala]: LineParsingSpeed.scala.md +[test/scala/lines.scala]: lines.scala.md +[test/scala/testData.scala]: testData.scala.md +[test/scala/FlatFileEntry.scala]: FlatFileEntry.scala.md +[test/scala/EntryParsingSpeed.scala]: EntryParsingSpeed.scala.md +[test/scala/FileReadSpeed.scala]: FileReadSpeed.scala.md +[test/scala/SeqOps.scala]: SeqOps.scala.md +[main/scala/entry.scala]: ../../main/scala/entry.scala.md +[main/scala/flat/SequenceData.scala]: ../../main/scala/flat/SequenceData.scala.md +[main/scala/flat/KW.scala]: ../../main/scala/flat/KW.scala.md +[main/scala/flat/ID.scala]: ../../main/scala/flat/ID.scala.md +[main/scala/flat/RC.scala]: ../../main/scala/flat/RC.scala.md +[main/scala/flat/DT.scala]: ../../main/scala/flat/DT.scala.md +[main/scala/flat/Entry.scala]: ../../main/scala/flat/Entry.scala.md +[main/scala/flat/GN.scala]: ../../main/scala/flat/GN.scala.md +[main/scala/flat/parsers.scala]: ../../main/scala/flat/parsers.scala.md +[main/scala/flat/RG.scala]: ../../main/scala/flat/RG.scala.md +[main/scala/flat/DR.scala]: ../../main/scala/flat/DR.scala.md +[main/scala/flat/OG.scala]: ../../main/scala/flat/OG.scala.md +[main/scala/flat/RL.scala]: ../../main/scala/flat/RL.scala.md +[main/scala/flat/SQ.scala]: ../../main/scala/flat/SQ.scala.md +[main/scala/flat/PE.scala]: ../../main/scala/flat/PE.scala.md +[main/scala/flat/OS.scala]: ../../main/scala/flat/OS.scala.md +[main/scala/flat/CC.scala]: ../../main/scala/flat/CC.scala.md +[main/scala/flat/OX.scala]: ../../main/scala/flat/OX.scala.md +[main/scala/flat/OH.scala]: ../../main/scala/flat/OH.scala.md +[main/scala/flat/RN.scala]: ../../main/scala/flat/RN.scala.md +[main/scala/flat/DE.scala]: ../../main/scala/flat/DE.scala.md +[main/scala/flat/RA.scala]: ../../main/scala/flat/RA.scala.md +[main/scala/flat/RX.scala]: ../../main/scala/flat/RX.scala.md +[main/scala/flat/FT.scala]: ../../main/scala/flat/FT.scala.md +[main/scala/flat/AC.scala]: ../../main/scala/flat/AC.scala.md +[main/scala/flat/RP.scala]: ../../main/scala/flat/RP.scala.md +[main/scala/flat/lineTypes.scala]: ../../main/scala/flat/lineTypes.scala.md +[main/scala/flat/RT.scala]: ../../main/scala/flat/RT.scala.md +[main/scala/seqOps.scala]: ../../main/scala/seqOps.scala.md \ No newline at end of file diff --git a/docs/src/test/scala/SeqOps.scala.md b/docs/src/test/scala/SeqOps.scala.md index 0da074d..e534604 100644 --- a/docs/src/test/scala/SeqOps.scala.md +++ b/docs/src/test/scala/SeqOps.scala.md @@ -23,6 +23,7 @@ class SeqOps extends FunSuite { +[test/scala/LineParsingSpeed.scala]: LineParsingSpeed.scala.md [test/scala/lines.scala]: lines.scala.md [test/scala/testData.scala]: testData.scala.md [test/scala/FlatFileEntry.scala]: FlatFileEntry.scala.md diff --git a/docs/src/test/scala/lines.scala.md b/docs/src/test/scala/lines.scala.md index c55597a..b076b7c 100644 --- a/docs/src/test/scala/lines.scala.md +++ b/docs/src/test/scala/lines.scala.md @@ -414,6 +414,7 @@ class Lines extends FunSuite { +[test/scala/LineParsingSpeed.scala]: LineParsingSpeed.scala.md [test/scala/lines.scala]: lines.scala.md [test/scala/testData.scala]: testData.scala.md [test/scala/FlatFileEntry.scala]: FlatFileEntry.scala.md diff --git a/docs/src/test/scala/testData.scala.md b/docs/src/test/scala/testData.scala.md index 83dd437..c9c5a88 100644 --- a/docs/src/test/scala/testData.scala.md +++ b/docs/src/test/scala/testData.scala.md @@ -12,6 +12,9 @@ case object testData { def swissProtLines = swissProtFile.getLines + final def entries = + bio4j.data.uniprot.flat.parsers.entries(testData.swissProtLines).map(bio4j.data.uniprot.flat.Entry.from) + lazy val entryLines: Vector[String] = entry.split('\n').dropWhile(_.isEmpty).toVector @@ -172,6 +175,7 @@ SQ SEQUENCE 589 AA; 66839 MW; D4CF69E0E818A988 CRC64; +[test/scala/LineParsingSpeed.scala]: LineParsingSpeed.scala.md [test/scala/lines.scala]: lines.scala.md [test/scala/testData.scala]: testData.scala.md [test/scala/FlatFileEntry.scala]: FlatFileEntry.scala.md