Skip to content

Commit

Permalink
Updated test suite to ignore empty message segments, updated build de…
Browse files Browse the repository at this point in the history
…fault spark version

Signed-off-by: Ryan DeCosmo <54866168+ryandecosmo@users.noreply.github.com>
  • Loading branch information
ryandecosmo committed May 21, 2021
1 parent 0935723 commit cfbbccd
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 6 deletions.
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import sbt.nio.Keys._
lazy val scala212 = "2.12.8"
lazy val scala211 = "2.11.12"

lazy val sparkVersion = sys.env.getOrElse("SPARK_VERSION", "3.0.0")
lazy val sparkVersion = sys.env.getOrElse("SPARK_VERSION", "3.0.1")

def majorMinorVersion(version: String): String = {
StringUtils.ordinalIndexOf(version, ".", 2) match {
Expand Down Expand Up @@ -158,7 +158,7 @@ lazy val sparkClasspath = taskKey[String]("sparkClasspath")
lazy val sparkHome = taskKey[String]("sparkHome")

// Publish to Bintray
ThisBuild / description := "An open-source toolkit for large-scale genomic analysis"
ThisBuild / description := "An open-source toolkit for large-scale EHR processing"
ThisBuild / homepage := Some(url("https://databricks.com/solutions/industries/healthcare"))
ThisBuild / scmInfo := Some(
ScmInfo(
Expand Down
5 changes: 4 additions & 1 deletion src/main/scala/com/databricks/labs/smolder/Message.scala
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,16 @@ private[smolder] object Message {
* @return Parses the message into a Message case class.
*/
def apply(text: UTF8String): Message = {

val delim: Byte = 0x0d

if (text == null) {
null
} else {
val textString = text.toString
require(textString.nonEmpty, "Received empty string.")

Message(textString.split('\n').toIterator)
Message(textString.split(delim.toChar).toIterator)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,10 @@ class MessageSuite extends SmolderBaseTest {

test("parse a full message, by string") {

val delim: Byte = 0x0d

val file = testFile("single_record.hl7")
val lines = Source.fromFile(file).getLines().mkString("\n")
val lines = Source.fromFile(file).getLines().mkString(delim.toChar.toString)

val message = Message(UTF8String.fromString(lines))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ class functionsSuite extends SmolderBaseTest {
.wholeTextFiles(file)
.map(p => TextFile(p._1, p._2)))

val hl7Df = df.select(parse_hl7_message(df("value")).alias("hl7"))
val cleanDF = df.select(regexp_replace(df("value"), "\n", "\r").alias("clean"))

val hl7Df = cleanDF.select(parse_hl7_message(cleanDF("clean")).alias("hl7"))

assert(hl7Df.count() === 1)
assert(hl7Df.selectExpr("explode(hl7.segments)").count() === 3)
Expand Down Expand Up @@ -59,7 +61,8 @@ class functionsSuite extends SmolderBaseTest {
.wholeTextFiles(file)
.map(p => TextFile(p._1, p._2)))

val hl7Df = df.select(parse_hl7_message(df("value")).alias("hl7"))
val cleanDF = df.select(regexp_replace(df("value"), "\n", "\r").alias("clean"))
val hl7Df = cleanDF.select(parse_hl7_message(cleanDF("clean")).alias("hl7"))

val evnType = hl7Df.select(segment_field("EVN", 0, col("hl7.segments"))
.alias("type"))
Expand Down

0 comments on commit cfbbccd

Please sign in to comment.