From 76e21abf96f30675afe75ce3141c2c772dc0b5de Mon Sep 17 00:00:00 2001 From: John Grimes Date: Thu, 11 Jan 2024 16:39:14 +1000 Subject: [PATCH 001/175] Update code examples on Spark installation page --- site/docs/libraries/installation/spark.md | 46 ++++++++++++++++++----- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/site/docs/libraries/installation/spark.md b/site/docs/libraries/installation/spark.md index 38f538319d..3ad8d7edb2 100644 --- a/site/docs/libraries/installation/spark.md +++ b/site/docs/libraries/installation/spark.md @@ -28,22 +28,45 @@ import TabItem from "@theme/TabItem"; ```python -from pathling import PathlingContext, find_jar +from pathling import PathlingContext from pyspark.sql import SparkSession spark = ( - SparkSession.builder - .config("spark.jars", find_jar()) - .config("spark.jars.packages", "io.delta:delta-core_2.12:2.2.0") - .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") - .config("spark.sql.catalog.spark_catalog", - "org.apache.spark.sql.delta.catalog.DeltaCatalog") - .getOrCreate() + SparkSession.builder.config( + "spark.jars.packages", + "au.csiro.pathling:library-runtime:6.4.2," + "io.delta:delta-core_2.12:2.4.0," + ) + .config( + "spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension" + ) + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog", + ) ) pc = PathlingContext.create(spark) ``` + + + +```r +library(sparklyr) +library(pathling) + +sc <- spark_connect(master = "local", + packages = c(paste("au.csiro.pathling:library-runtime:", pathling_version()), + "io.delta:delta-core_2.12:2.4.0"), + config = list("sparklyr.shell.conf" = c( + "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension", + "spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" + )), version = "3.4.0") + +pc <- pathling_connect(sc) +``` + @@ -51,7 +74,8 @@ pc = PathlingContext.create(spark) import au.csiro.pathling.library.PathlingContext val spark = SparkSession.builder - .config("spark.jars.packages", "io.delta:delta-core_2.12:2.2.0") + .config("spark.jars.packages", "au.csiro.pathling:library-runtime:6.4.2," + + "io.delta:delta-core_2.12:2.4.0") .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") @@ -71,7 +95,9 @@ class MyApp { public static void main(String[] args) { SparkSession spark = SparkSession.builder() - .config("spark.jars.packages", "io.delta:delta-core_2.12:2.2.0") + .config("spark.jars.packages", + "au.csiro.pathling:library-runtime:6.4.2," + + "io.delta:delta-core_2.12:2.4.0") .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .config("spark.sql.catalog.spark_catalog", From cef7711fef8690c4bc1c4edd79e3b63ce33a3959 Mon Sep 17 00:00:00 2001 From: John Grimes Date: Wed, 7 Feb 2024 13:48:09 +1000 Subject: [PATCH 002/175] Add schema specification to docs --- .../{encoders.md => encoders/index.md} | 0 site/docs/libraries/encoders/schema.md | 518 ++++++++++++++++++ 2 files changed, 518 insertions(+) rename site/docs/libraries/{encoders.md => encoders/index.md} (100%) create mode 100644 site/docs/libraries/encoders/schema.md diff --git a/site/docs/libraries/encoders.md b/site/docs/libraries/encoders/index.md similarity index 100% rename from site/docs/libraries/encoders.md rename to site/docs/libraries/encoders/index.md diff --git a/site/docs/libraries/encoders/schema.md b/site/docs/libraries/encoders/schema.md new file mode 100644 index 0000000000..2cd29500c2 --- /dev/null +++ b/site/docs/libraries/encoders/schema.md @@ -0,0 +1,518 @@ +# Schema specification + +This specification describes a scheme for encoding resource definitions from the +FHIR specification as a Spark SQL schema. + +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", " +SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be +interpreted as described in [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt). + +## Version compatibility + +The following versions of FHIR are supported by this specification: + +- [4.0.1 (R4)](https://hl7.org/fhir/R4) + +## Resource type support + +All R4 resource types are supported by this specification, except the following: + +- Parameters +- Task +- StructureDefinition +- StructureMap +- Bundle + +## Configuration options + +There are several options that determine the encoding strategy. Schemas that +have been encoded according to this specification with the same configuration +values SHALL be identical and compatible. + +| Option | Description | +|----------------------------|-----------------------------------------------------------| +| Maximum nesting level | The maximum depth of nested element data that is encoded. | +| Extension encoding enabled | Whether extension content is included within the schema. | +| Supported open types | The list of types that are encoded for open choice types. | + +## Out of scope + +The following areas are not currently supported by this specification: + +- Primitive extensions +- Profiled resources + +## Encoding strategy + +All columns and struct fields SHALL be encoded as nullable, unless otherwise +specified. + +### Primitive elements + +Each primitive element within a resource definition SHALL be encoded as a column +within the schema with the same name (except where otherwise specified). The +data type SHALL be determined by the element type according to the following +table: + +| FHIR type | Spark SQL type | Additional requirements | +|-----------------|----------------|--------------------------------------------------------------------------------------------------| +| boolean | boolean | | +| canonical | string | Compliant with the [FHIR canonical format](https://hl7.org/fhir/R4/datatypes.html#canonical) | +| code | string | Compliant with the [FHIR code format](https://hl7.org/fhir/R4/datatypes.html#code) | +| dateTime | string | Compliant with the [FHIR dateTime format](https://hl7.org/fhir/R4/datatypes.html#dateTime) | +| date | string | Compliant with the [FHIR date format](https://hl7.org/fhir/R4/datatypes.html#date) | +| decimal | decimal(32,6) | See [Decimal encoding](#decimal-encoding) | +| id | string | See [ID encoding](#id-encoding) | +| instant | timestamp | | +| integer | integer | Compliant with the [FHIR integer format](https://hl7.org/fhir/R4/datatypes.html#integer) | +| markdown | string | Compliant with the [FHIR markdown format](https://hl7.org/fhir/R4/datatypes.html#markdown) | +| oid | string | Compliant with the [FHIR oid format](https://hl7.org/fhir/R4/datatypes.html#oid) | +| positiveInt | integer | Compliant with the [FHIR positiveInt format](https://hl7.org/fhir/R4/datatypes.html#positiveInt) | +| string | string | Compliant with the [FHIR string format](https://hl7.org/fhir/R4/datatypes.html#string) | +| time | string | Compliant with the [FHIR time format](https://hl7.org/fhir/R4/datatypes.html#time) | +| unsignedInt | integer | Compliant with the [FHIR unsignedInt format](https://hl7.org/fhir/R4/datatypes.html#unsignedInt) | +| uri | string | Compliant with the [FHIR uri format](https://hl7.org/fhir/R4/datatypes.html#uri) | +| url | string | Compliant with the [FHIR url format](https://hl7.org/fhir/R4/datatypes.html#url) | +| uuid | string | Compliant with the [FHIR uuid format](https://hl7.org/fhir/R4/datatypes.html#uuid) | + +#### Decimal encoding + +An element of type `decimal` SHALL be encoded as a decimal column with a +precision of 32 and a scale of 6. + +In addition to this column, an integer column SHALL be encoded with the +suffix `_scale`. This column SHALL contain the scale of the decimal value from +the original FHIR data. + +#### ID encoding + +An element of type `id` SHALL be encoded as a string column. This column SHALL +contain the FHIR resource logical ID. + +In addition to this column, a string column SHALL be encoded with the +suffix `_versioned`. This column SHALL contain a fully qualified logical ID, +including the resource type and the technical version. The data in this column +SHALL follow the format `[resource type]/[logical ID]/_history/[version]`. + +### Choice types + +If the choice type is open (i.e. a type of `*`), the struct SHALL contain a +field for each type listed in the configuration value "supported open types". + +If the choice type is not open, it SHALL be encoded as a struct column with a +field for each of its valid types. + +The name of each field SHALL follow the format `value[type]`, where `[type]` is +the name of the type in upper camel case. + +### Complex and backbone elements + +Each complex and backbone element within a resource definition SHALL be encoded +as a struct column within the schema with the same name. The struct SHALL +contain fields for each of the child elements of the complex or backbone +element. + +The encoding rules for each field SHALL follow the same rules as described in +the [primitive elements](#primitive-elements) and [choice types](#choice-types) +section (and this section in the case of nested complex or backbone elements). + +If the "extension encoding enabled" option is set to true, an additional column +SHALL be encoded with the name `_fid`. This column SHALL have a type of integer. +This column SHALL contain a value that uniquely identifies the complex or +backbone element within the resource. + +#### Quantity encoding + +If the complex element is of +type [Quantity](https://hl7.org/fhir/R4/datatypes.html#Quantity), an additional +two columns SHALL be encoded as part of the struct. These columns SHALL be +named `_value_canonicalized` and `_code_canonicalized`. + +The `_value_canonicalized` column SHALL be encoded as a struct with the +following fields: + +- `value` with a type of decimal, precision 38 and scale 0. +- `scale` with a type of integer. + +The `_code_canonicalized` column SHALL be encoded as a string column. + +Implementations loading data into this schema MAY use these columns to store +canonicalized versions of the `value` and `code` fields from the original FHIR +data, for easier comparison and querying. + +### Extension encoding + +If the "extension encoding enabled" option is set to true, an additional column +SHALL be encoded at the root of the schema named `_extension`. This column SHALL +have a type of map, with an integer key and an array value. + +The array SHALL contain a struct type that is encoded as +the [Extension](https://hl7.org/fhir/R4/extensibility.html#extension) type, as +described in [Complex and backbone elements](#complex-and-backbone-elements). + +Implementations loading data into this schema SHALL use this column to store +extension data from the original FHIR data (except for primitive extensions). +The map SHALL contain a reference to the `_fid` of the element that the +extension is attached to. + +## Example + +The following schema is an example of the encoding of +the [Patient](https://hl7.org/fhir/R4/patient.html) resource type with the +following configuration values: + +- Maximum nesting level: `3` +- Extension encoding enabled: `true` +- Supported open types: `boolean`, `code`, `date`, `dateTime`, `decimal`, + `integer`, `string`, `Coding`, `CodeableConcept`, `Address`, `Identifier`, + `Reference` + +``` +root + |-- id: string (nullable = true) + |-- id_versioned: string (nullable = true) + |-- meta: struct (nullable = true) + | |-- id: string (nullable = true) + | |-- versionId: string (nullable = true) + | |-- versionId_versioned: string (nullable = true) + | |-- lastUpdated: timestamp (nullable = true) + | |-- source: string (nullable = true) + | |-- profile: array (nullable = true) + | | |-- element: string (containsNull = true) + | |-- security: array (nullable = true) + | | |-- element: struct (containsNull = true) + | | | |-- id: string (nullable = true) + | | | |-- system: string (nullable = true) + | | | |-- version: string (nullable = true) + | | | |-- code: string (nullable = true) + | | | |-- display: string (nullable = true) + | | | |-- userSelected: boolean (nullable = true) + | | | |-- _fid: integer (nullable = true) + | |-- tag: array (nullable = true) + | | |-- element: struct (containsNull = true) + | | | |-- id: string (nullable = true) + | | | |-- system: string (nullable = true) + | | | |-- version: string (nullable = true) + | | | |-- code: string (nullable = true) + | | | |-- display: string (nullable = true) + | | | |-- userSelected: boolean (nullable = true) + | | | |-- _fid: integer (nullable = true) + | |-- _fid: integer (nullable = true) + |-- implicitRules: string (nullable = true) + |-- language: string (nullable = true) + |-- text: struct (nullable = true) + | |-- id: string (nullable = true) + | |-- status: string (nullable = true) + | |-- div: string (nullable = true) + | |-- _fid: integer (nullable = true) + |-- identifier: array (nullable = true) + | |-- element: struct (containsNull = true) + | | |-- id: string (nullable = true) + | | |-- use: string (nullable = true) + | | |-- type: struct (nullable = true) + | | | |-- id: string (nullable = true) + | | | |-- coding: array (nullable = true) + | | | | |-- element: struct (containsNull = true) + | | | | | |-- id: string (nullable = true) + | | | | | |-- system: string (nullable = true) + | | | | | |-- version: string (nullable = true) + | | | | | |-- code: string (nullable = true) + | | | | | |-- display: string (nullable = true) + | | | | | |-- userSelected: boolean (nullable = true) + | | | | | |-- _fid: integer (nullable = true) + | | | |-- text: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- system: string (nullable = true) + | | |-- value: string (nullable = true) + | | |-- period: struct (nullable = true) + | | | |-- id: string (nullable = true) + | | | |-- start: string (nullable = true) + | | | |-- end: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- assigner: struct (nullable = true) + | | | |-- reference: string (nullable = true) + | | | |-- display: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- _fid: integer (nullable = true) + |-- active: boolean (nullable = true) + |-- name: array (nullable = true) + | |-- element: struct (containsNull = true) + | | |-- id: string (nullable = true) + | | |-- use: string (nullable = true) + | | |-- text: string (nullable = true) + | | |-- family: string (nullable = true) + | | |-- given: array (nullable = true) + | | | |-- element: string (containsNull = true) + | | |-- prefix: array (nullable = true) + | | | |-- element: string (containsNull = true) + | | |-- suffix: array (nullable = true) + | | | |-- element: string (containsNull = true) + | | |-- period: struct (nullable = true) + | | | |-- id: string (nullable = true) + | | | |-- start: string (nullable = true) + | | | |-- end: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- _fid: integer (nullable = true) + |-- telecom: array (nullable = true) + | |-- element: struct (containsNull = true) + | | |-- id: string (nullable = true) + | | |-- system: string (nullable = true) + | | |-- value: string (nullable = true) + | | |-- use: string (nullable = true) + | | |-- rank: integer (nullable = true) + | | |-- period: struct (nullable = true) + | | | |-- id: string (nullable = true) + | | | |-- start: string (nullable = true) + | | | |-- end: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- _fid: integer (nullable = true) + |-- gender: string (nullable = true) + |-- birthDate: string (nullable = true) + |-- deceasedBoolean: boolean (nullable = true) + |-- deceasedDateTime: string (nullable = true) + |-- address: array (nullable = true) + | |-- element: struct (containsNull = true) + | | |-- id: string (nullable = true) + | | |-- use: string (nullable = true) + | | |-- type: string (nullable = true) + | | |-- text: string (nullable = true) + | | |-- line: array (nullable = true) + | | | |-- element: string (containsNull = true) + | | |-- city: string (nullable = true) + | | |-- district: string (nullable = true) + | | |-- state: string (nullable = true) + | | |-- postalCode: string (nullable = true) + | | |-- country: string (nullable = true) + | | |-- period: struct (nullable = true) + | | | |-- id: string (nullable = true) + | | | |-- start: string (nullable = true) + | | | |-- end: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- _fid: integer (nullable = true) + |-- maritalStatus: struct (nullable = true) + | |-- id: string (nullable = true) + | |-- coding: array (nullable = true) + | | |-- element: struct (containsNull = true) + | | | |-- id: string (nullable = true) + | | | |-- system: string (nullable = true) + | | | |-- version: string (nullable = true) + | | | |-- code: string (nullable = true) + | | | |-- display: string (nullable = true) + | | | |-- userSelected: boolean (nullable = true) + | | | |-- _fid: integer (nullable = true) + | |-- text: string (nullable = true) + | |-- _fid: integer (nullable = true) + |-- multipleBirthBoolean: boolean (nullable = true) + |-- multipleBirthInteger: integer (nullable = true) + |-- photo: array (nullable = true) + | |-- element: struct (containsNull = true) + | | |-- id: string (nullable = true) + | | |-- contentType: string (nullable = true) + | | |-- language: string (nullable = true) + | | |-- data: binary (nullable = true) + | | |-- url: string (nullable = true) + | | |-- size: integer (nullable = true) + | | |-- hash: binary (nullable = true) + | | |-- title: string (nullable = true) + | | |-- creation: string (nullable = true) + | | |-- _fid: integer (nullable = true) + |-- contact: array (nullable = true) + | |-- element: struct (containsNull = true) + | | |-- id: string (nullable = true) + | | |-- relationship: array (nullable = true) + | | | |-- element: struct (containsNull = true) + | | | | |-- id: string (nullable = true) + | | | | |-- coding: array (nullable = true) + | | | | | |-- element: struct (containsNull = true) + | | | | | | |-- id: string (nullable = true) + | | | | | | |-- system: string (nullable = true) + | | | | | | |-- version: string (nullable = true) + | | | | | | |-- code: string (nullable = true) + | | | | | | |-- display: string (nullable = true) + | | | | | | |-- userSelected: boolean (nullable = true) + | | | | | | |-- _fid: integer (nullable = true) + | | | | |-- text: string (nullable = true) + | | | | |-- _fid: integer (nullable = true) + | | |-- name: struct (nullable = true) + | | | |-- id: string (nullable = true) + | | | |-- use: string (nullable = true) + | | | |-- text: string (nullable = true) + | | | |-- family: string (nullable = true) + | | | |-- given: array (nullable = true) + | | | | |-- element: string (containsNull = true) + | | | |-- prefix: array (nullable = true) + | | | | |-- element: string (containsNull = true) + | | | |-- suffix: array (nullable = true) + | | | | |-- element: string (containsNull = true) + | | | |-- period: struct (nullable = true) + | | | | |-- id: string (nullable = true) + | | | | |-- start: string (nullable = true) + | | | | |-- end: string (nullable = true) + | | | | |-- _fid: integer (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- telecom: array (nullable = true) + | | | |-- element: struct (containsNull = true) + | | | | |-- id: string (nullable = true) + | | | | |-- system: string (nullable = true) + | | | | |-- value: string (nullable = true) + | | | | |-- use: string (nullable = true) + | | | | |-- rank: integer (nullable = true) + | | | | |-- period: struct (nullable = true) + | | | | | |-- id: string (nullable = true) + | | | | | |-- start: string (nullable = true) + | | | | | |-- end: string (nullable = true) + | | | | | |-- _fid: integer (nullable = true) + | | | | |-- _fid: integer (nullable = true) + | | |-- address: struct (nullable = true) + | | | |-- id: string (nullable = true) + | | | |-- use: string (nullable = true) + | | | |-- type: string (nullable = true) + | | | |-- text: string (nullable = true) + | | | |-- line: array (nullable = true) + | | | | |-- element: string (containsNull = true) + | | | |-- city: string (nullable = true) + | | | |-- district: string (nullable = true) + | | | |-- state: string (nullable = true) + | | | |-- postalCode: string (nullable = true) + | | | |-- country: string (nullable = true) + | | | |-- period: struct (nullable = true) + | | | | |-- id: string (nullable = true) + | | | | |-- start: string (nullable = true) + | | | | |-- end: string (nullable = true) + | | | | |-- _fid: integer (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- gender: string (nullable = true) + | | |-- organization: struct (nullable = true) + | | | |-- reference: string (nullable = true) + | | | |-- display: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- period: struct (nullable = true) + | | | |-- id: string (nullable = true) + | | | |-- start: string (nullable = true) + | | | |-- end: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- _fid: integer (nullable = true) + |-- communication: array (nullable = true) + | |-- element: struct (containsNull = true) + | | |-- id: string (nullable = true) + | | |-- language: struct (nullable = true) + | | | |-- id: string (nullable = true) + | | | |-- coding: array (nullable = true) + | | | | |-- element: struct (containsNull = true) + | | | | | |-- id: string (nullable = true) + | | | | | |-- system: string (nullable = true) + | | | | | |-- version: string (nullable = true) + | | | | | |-- code: string (nullable = true) + | | | | | |-- display: string (nullable = true) + | | | | | |-- userSelected: boolean (nullable = true) + | | | | | |-- _fid: integer (nullable = true) + | | | |-- text: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- preferred: boolean (nullable = true) + | | |-- _fid: integer (nullable = true) + |-- generalPractitioner: array (nullable = true) + | |-- element: struct (containsNull = true) + | | |-- reference: string (nullable = true) + | | |-- display: string (nullable = true) + | | |-- _fid: integer (nullable = true) + |-- managingOrganization: struct (nullable = true) + | |-- reference: string (nullable = true) + | |-- display: string (nullable = true) + | |-- _fid: integer (nullable = true) + |-- link: array (nullable = true) + | |-- element: struct (containsNull = true) + | | |-- id: string (nullable = true) + | | |-- other: struct (nullable = true) + | | | |-- reference: string (nullable = true) + | | | |-- display: string (nullable = true) + | | | |-- _fid: integer (nullable = true) + | | |-- type: string (nullable = true) + | | |-- _fid: integer (nullable = true) + |-- _fid: integer (nullable = true) + |-- _extension: map (nullable = true) + | |-- key: integer + | |-- value: array (valueContainsNull = false) + | | |-- element: struct (containsNull = true) + | | | |-- id: string (nullable = true) + | | | |-- url: string (nullable = true) + | | | |-- valueAddress: struct (nullable = true) + | | | | |-- id: string (nullable = true) + | | | | |-- use: string (nullable = true) + | | | | |-- type: string (nullable = true) + | | | | |-- text: string (nullable = true) + | | | | |-- line: array (nullable = true) + | | | | | |-- element: string (containsNull = true) + | | | | |-- city: string (nullable = true) + | | | | |-- district: string (nullable = true) + | | | | |-- state: string (nullable = true) + | | | | |-- postalCode: string (nullable = true) + | | | | |-- country: string (nullable = true) + | | | | |-- period: struct (nullable = true) + | | | | | |-- id: string (nullable = true) + | | | | | |-- start: string (nullable = true) + | | | | | |-- end: string (nullable = true) + | | | | | |-- _fid: integer (nullable = true) + | | | | |-- _fid: integer (nullable = true) + | | | |-- valueBoolean: boolean (nullable = true) + | | | |-- valueCode: string (nullable = true) + | | | |-- valueCodeableConcept: struct (nullable = true) + | | | | |-- id: string (nullable = true) + | | | | |-- coding: array (nullable = true) + | | | | | |-- element: struct (containsNull = true) + | | | | | | |-- id: string (nullable = true) + | | | | | | |-- system: string (nullable = true) + | | | | | | |-- version: string (nullable = true) + | | | | | | |-- code: string (nullable = true) + | | | | | | |-- display: string (nullable = true) + | | | | | | |-- userSelected: boolean (nullable = true) + | | | | | | |-- _fid: integer (nullable = true) + | | | | |-- text: string (nullable = true) + | | | | |-- _fid: integer (nullable = true) + | | | |-- valueCoding: struct (nullable = true) + | | | | |-- id: string (nullable = true) + | | | | |-- system: string (nullable = true) + | | | | |-- version: string (nullable = true) + | | | | |-- code: string (nullable = true) + | | | | |-- display: string (nullable = true) + | | | | |-- userSelected: boolean (nullable = true) + | | | | |-- _fid: integer (nullable = true) + | | | |-- valueDateTime: string (nullable = true) + | | | |-- valueDate: string (nullable = true) + | | | |-- valueDecimal: decimal(32,6) (nullable = true) + | | | |-- valueDecimal_scale: integer (nullable = true) + | | | |-- valueIdentifier: struct (nullable = true) + | | | | |-- id: string (nullable = true) + | | | | |-- use: string (nullable = true) + | | | | |-- type: struct (nullable = true) + | | | | | |-- id: string (nullable = true) + | | | | | |-- coding: array (nullable = true) + | | | | | | |-- element: struct (containsNull = true) + | | | | | | | |-- id: string (nullable = true) + | | | | | | | |-- system: string (nullable = true) + | | | | | | | |-- version: string (nullable = true) + | | | | | | | |-- code: string (nullable = true) + | | | | | | | |-- display: string (nullable = true) + | | | | | | | |-- userSelected: boolean (nullable = true) + | | | | | | | |-- _fid: integer (nullable = true) + | | | | | |-- text: string (nullable = true) + | | | | | |-- _fid: integer (nullable = true) + | | | | |-- system: string (nullable = true) + | | | | |-- value: string (nullable = true) + | | | | |-- period: struct (nullable = true) + | | | | | |-- id: string (nullable = true) + | | | | | |-- start: string (nullable = true) + | | | | | |-- end: string (nullable = true) + | | | | | |-- _fid: integer (nullable = true) + | | | | |-- assigner: struct (nullable = true) + | | | | | |-- reference: string (nullable = true) + | | | | | |-- display: string (nullable = true) + | | | | | |-- _fid: integer (nullable = true) + | | | | |-- _fid: integer (nullable = true) + | | | |-- valueInteger: integer (nullable = true) + | | | |-- valueReference: struct (nullable = true) + | | | | |-- reference: string (nullable = true) + | | | | |-- display: string (nullable = true) + | | | | |-- _fid: integer (nullable = true) + | | | |-- valueString: string (nullable = true) + | | | |-- _fid: integer (nullable = true) +``` From 3b73234c570096cc07a64819db8ed25e0fbed88b Mon Sep 17 00:00:00 2001 From: John Grimes Date: Wed, 7 Feb 2024 14:47:35 +1000 Subject: [PATCH 003/175] Add Parquet specification to docs --- site/docs/libraries/encoders/schema.md | 1009 ++++++++++++++---------- 1 file changed, 575 insertions(+), 434 deletions(-) diff --git a/site/docs/libraries/encoders/schema.md b/site/docs/libraries/encoders/schema.md index 2cd29500c2..feab17946a 100644 --- a/site/docs/libraries/encoders/schema.md +++ b/site/docs/libraries/encoders/schema.md @@ -1,7 +1,7 @@ -# Schema specification +# Parquet specification -This specification describes a scheme for encoding resource definitions from the -FHIR specification as a Spark SQL schema. +This specification describes a scheme for representing FHIR resources within a +[Parquet](https://parquet.apache.org/) schema. The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", " SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be @@ -25,494 +25,635 @@ All R4 resource types are supported by this specification, except the following: ## Configuration options -There are several options that determine the encoding strategy. Schemas that -have been encoded according to this specification with the same configuration -values SHALL be identical and compatible. +There are several options that determine the structure of the schema. Any two +schemas that conform to this specification with the same configuration values +SHALL be identical and compatible. -| Option | Description | -|----------------------------|-----------------------------------------------------------| -| Maximum nesting level | The maximum depth of nested element data that is encoded. | -| Extension encoding enabled | Whether extension content is included within the schema. | -| Supported open types | The list of types that are encoded for open choice types. | +| Option | Description | +|-----------------------|-------------------------------------------------------------| +| Maximum nesting level | The maximum supported depth of nested element data. | +| Extensions enabled | Whether extension content is included within the schema. | +| Supported open types | The list of types that are supported for open choice types. | ## Out of scope -The following areas are not currently supported by this specification: +The following features are not currently supported by this specification: - Primitive extensions - Profiled resources -## Encoding strategy +## Schema structure -All columns and struct fields SHALL be encoded as nullable, unless otherwise -specified. +All fields SHALL be encoded as `OPTIONAL`, unless otherwise specified. All +groups SHALL be encoded as `REQUIRED`. ### Primitive elements -Each primitive element within a resource definition SHALL be encoded as a column -within the schema with the same name (except where otherwise specified). The -data type SHALL be determined by the element type according to the following +Each primitive element within a resource definition SHALL be represented as a +field within the schema with the same name (except where otherwise specified). +The data type SHALL be determined by the element type according to the following table: -| FHIR type | Spark SQL type | Additional requirements | -|-----------------|----------------|--------------------------------------------------------------------------------------------------| -| boolean | boolean | | -| canonical | string | Compliant with the [FHIR canonical format](https://hl7.org/fhir/R4/datatypes.html#canonical) | -| code | string | Compliant with the [FHIR code format](https://hl7.org/fhir/R4/datatypes.html#code) | -| dateTime | string | Compliant with the [FHIR dateTime format](https://hl7.org/fhir/R4/datatypes.html#dateTime) | -| date | string | Compliant with the [FHIR date format](https://hl7.org/fhir/R4/datatypes.html#date) | -| decimal | decimal(32,6) | See [Decimal encoding](#decimal-encoding) | -| id | string | See [ID encoding](#id-encoding) | -| instant | timestamp | | -| integer | integer | Compliant with the [FHIR integer format](https://hl7.org/fhir/R4/datatypes.html#integer) | -| markdown | string | Compliant with the [FHIR markdown format](https://hl7.org/fhir/R4/datatypes.html#markdown) | -| oid | string | Compliant with the [FHIR oid format](https://hl7.org/fhir/R4/datatypes.html#oid) | -| positiveInt | integer | Compliant with the [FHIR positiveInt format](https://hl7.org/fhir/R4/datatypes.html#positiveInt) | -| string | string | Compliant with the [FHIR string format](https://hl7.org/fhir/R4/datatypes.html#string) | -| time | string | Compliant with the [FHIR time format](https://hl7.org/fhir/R4/datatypes.html#time) | -| unsignedInt | integer | Compliant with the [FHIR unsignedInt format](https://hl7.org/fhir/R4/datatypes.html#unsignedInt) | -| uri | string | Compliant with the [FHIR uri format](https://hl7.org/fhir/R4/datatypes.html#uri) | -| url | string | Compliant with the [FHIR url format](https://hl7.org/fhir/R4/datatypes.html#url) | -| uuid | string | Compliant with the [FHIR uuid format](https://hl7.org/fhir/R4/datatypes.html#uuid) | - -#### Decimal encoding - -An element of type `decimal` SHALL be encoded as a decimal column with a -precision of 32 and a scale of 6. +| FHIR type | Spark SQL type | Additional requirements | +|-------------|----------------|----------------------------------------------------------------------------------------------| +| boolean | BOOLEAN | | +| canonical | BINARY (UTF8) | Compliant with the [FHIR canonical format](https://hl7.org/fhir/R4/datatypes.html#canonical) | +| code | BINARY (UTF8) | Compliant with the [FHIR code format](https://hl7.org/fhir/R4/datatypes.html#code) | +| dateTime | BINARY (UTF8) | Compliant with the [FHIR dateTime format](https://hl7.org/fhir/R4/datatypes.html#dateTime) | +| date | BINARY (UTF8) | Compliant with the [FHIR date format](https://hl7.org/fhir/R4/datatypes.html#date) | +| decimal | DECIMAL(32,6) | See [Decimal type](#decimal-type) | +| id | BINARY (UTF8) | See [ID type](#id-type) | +| instant | INT96 | | +| integer | INT32 | Compliant with the [FHIR integer format](https://hl7.org/fhir/R4/datatypes.html#integer) | +| markdown | BINARY (UTF8) | Compliant with the [FHIR markdown format](https://hl7.org/fhir/R4/datatypes.html#markdown) | +| oid | BINARY (UTF8) | Compliant with the [FHIR oid format](https://hl7.org/fhir/R4/datatypes.html#oid) | +| positiveInt | INT32 | liant with the [FHIR positiveInt format](https://hl7.org/fhir/R4/datatypes.html#positiveInt) | +| string | BINARY (UTF8) | Compliant with the [FHIR string format](https://hl7.org/fhir/R4/datatypes.html#string) | +| time | BINARY (UTF8) | Compliant with the [FHIR time format](https://hl7.org/fhir/R4/datatypes.html#time) | +| unsignedInt | INT32 | liant with the [FHIR unsignedInt format](https://hl7.org/fhir/R4/datatypes.html#unsignedInt) | +| uri | BINARY (UTF8) | Compliant with the [FHIR uri format](https://hl7.org/fhir/R4/datatypes.html#uri) | +| url | BINARY (UTF8) | Compliant with the [FHIR url format](https://hl7.org/fhir/R4/datatypes.html#url) | +| uuid | BINARY (UTF8) | Compliant with the [FHIR uuid format](https://hl7.org/fhir/R4/datatypes.html#uuid) | -In addition to this column, an integer column SHALL be encoded with the -suffix `_scale`. This column SHALL contain the scale of the decimal value from -the original FHIR data. +### Complex and backbone elements -#### ID encoding +Each complex and backbone element within a resource definition SHALL be +represented as a group within the schema with the same name. The group SHALL +contain a fields for each of the child elements. -An element of type `id` SHALL be encoded as a string column. This column SHALL -contain the FHIR resource logical ID. +Each field SHALL be represented as described in +the [Primitive elements](#primitive-elements) and [Choice types](#choice-types) +sections (or this section in the case of a nested complex or backbone element). -In addition to this column, a string column SHALL be encoded with the -suffix `_versioned`. This column SHALL contain a fully qualified logical ID, -including the resource type and the technical version. The data in this column -SHALL follow the format `[resource type]/[logical ID]/_history/[version]`. +If the "extensions enabled" option is set to true, an additional field +SHALL be included with the name `_fid`. This field SHALL have a type of `INT32`. +This field SHALL contain a value that uniquely identifies the complex or +backbone element within the resource. ### Choice types -If the choice type is open (i.e. a type of `*`), the struct SHALL contain a +If the choice type is open (i.e. a type of `*`), the group SHALL contain a field for each type listed in the configuration value "supported open types". -If the choice type is not open, it SHALL be encoded as a struct column with a +If the choice type is not open, it SHALL be represented as a group with a field for each of its valid types. The name of each field SHALL follow the format `value[type]`, where `[type]` is the name of the type in upper camel case. -### Complex and backbone elements +### Decimal type -Each complex and backbone element within a resource definition SHALL be encoded -as a struct column within the schema with the same name. The struct SHALL -contain fields for each of the child elements of the complex or backbone -element. +An element of type `decimal` SHALL be represented as a `DECIMAL` field with a +precision of 32 and a scale of 6. -The encoding rules for each field SHALL follow the same rules as described in -the [primitive elements](#primitive-elements) and [choice types](#choice-types) -section (and this section in the case of nested complex or backbone elements). +In addition, an `INT32` field SHALL be included with the suffix `_scale`. This +field SHALL be used to store the scale of the decimal value from the original +FHIR data. -If the "extension encoding enabled" option is set to true, an additional column -SHALL be encoded with the name `_fid`. This column SHALL have a type of integer. -This column SHALL contain a value that uniquely identifies the complex or -backbone element within the resource. +### ID type + +An element of type `id` SHALL be represented as a `BINARY (UTF8)` field. This +field SHALL be used to store the FHIR resource logical ID. + +In addition to this field, a `BINARY (UTF8)` field SHALL be included with the +suffix `_versioned`. This field SHALL be used to store a fully qualified logical +ID that includes the resource type and the technical version. The data in this +field SHALL follow the format `[resource type]/[logical ID]/_history/[version]`. -#### Quantity encoding +### Quantity type -If the complex element is of +If a complex element is of type [Quantity](https://hl7.org/fhir/R4/datatypes.html#Quantity), an additional -two columns SHALL be encoded as part of the struct. These columns SHALL be +two fields SHALL be included as part of the group. These fields SHALL be named `_value_canonicalized` and `_code_canonicalized`. -The `_value_canonicalized` column SHALL be encoded as a struct with the +The `_value_canonicalized` field SHALL be encoded as a group with the following fields: - `value` with a type of decimal, precision 38 and scale 0. - `scale` with a type of integer. -The `_code_canonicalized` column SHALL be encoded as a string column. +The `_code_canonicalized` field SHALL be encoded as a string field. -Implementations loading data into this schema MAY use these columns to store -canonicalized versions of the `value` and `code` fields from the original FHIR -data, for easier comparison and querying. +These fields MAY be used to store canonicalized versions of the `value` +and `code` fields from the original FHIR data, for easier comparison and +querying. -### Extension encoding +### Extensions -If the "extension encoding enabled" option is set to true, an additional column -SHALL be encoded at the root of the schema named `_extension`. This column SHALL -have a type of map, with an integer key and an array value. +If the "extensions enabled" option is true, an additional field SHALL be +included at the root of the schema named `_extension`. This field SHALL have a +type of `MAP`, with an `INT32` key and a repeated group value. -The array SHALL contain a struct type that is encoded as +The group used for each value in the map SHALL be represented using the [Extension](https://hl7.org/fhir/R4/extensibility.html#extension) type, as described in [Complex and backbone elements](#complex-and-backbone-elements). -Implementations loading data into this schema SHALL use this column to store -extension data from the original FHIR data (except for primitive extensions). -The map SHALL contain a reference to the `_fid` of the element that the -extension is attached to. +If the "extensions enabled" option is true, this field SHALL be used to store +extension data from the original FHIR data (excluding primitive extensions). +Each key within the map SHALL contain a reference to the `_fid` of the element +that the extension is attached to. ## Example -The following schema is an example of the encoding of -the [Patient](https://hl7.org/fhir/R4/patient.html) resource type with the +The following schema is an example of how to represent +the [Patient](https://hl7.org/fhir/R4/patient.html) resource type using the following configuration values: - Maximum nesting level: `3` -- Extension encoding enabled: `true` +- Extensions enabled: `true` - Supported open types: `boolean`, `code`, `date`, `dateTime`, `decimal`, `integer`, `string`, `Coding`, `CodeableConcept`, `Address`, `Identifier`, `Reference` ``` -root - |-- id: string (nullable = true) - |-- id_versioned: string (nullable = true) - |-- meta: struct (nullable = true) - | |-- id: string (nullable = true) - | |-- versionId: string (nullable = true) - | |-- versionId_versioned: string (nullable = true) - | |-- lastUpdated: timestamp (nullable = true) - | |-- source: string (nullable = true) - | |-- profile: array (nullable = true) - | | |-- element: string (containsNull = true) - | |-- security: array (nullable = true) - | | |-- element: struct (containsNull = true) - | | | |-- id: string (nullable = true) - | | | |-- system: string (nullable = true) - | | | |-- version: string (nullable = true) - | | | |-- code: string (nullable = true) - | | | |-- display: string (nullable = true) - | | | |-- userSelected: boolean (nullable = true) - | | | |-- _fid: integer (nullable = true) - | |-- tag: array (nullable = true) - | | |-- element: struct (containsNull = true) - | | | |-- id: string (nullable = true) - | | | |-- system: string (nullable = true) - | | | |-- version: string (nullable = true) - | | | |-- code: string (nullable = true) - | | | |-- display: string (nullable = true) - | | | |-- userSelected: boolean (nullable = true) - | | | |-- _fid: integer (nullable = true) - | |-- _fid: integer (nullable = true) - |-- implicitRules: string (nullable = true) - |-- language: string (nullable = true) - |-- text: struct (nullable = true) - | |-- id: string (nullable = true) - | |-- status: string (nullable = true) - | |-- div: string (nullable = true) - | |-- _fid: integer (nullable = true) - |-- identifier: array (nullable = true) - | |-- element: struct (containsNull = true) - | | |-- id: string (nullable = true) - | | |-- use: string (nullable = true) - | | |-- type: struct (nullable = true) - | | | |-- id: string (nullable = true) - | | | |-- coding: array (nullable = true) - | | | | |-- element: struct (containsNull = true) - | | | | | |-- id: string (nullable = true) - | | | | | |-- system: string (nullable = true) - | | | | | |-- version: string (nullable = true) - | | | | | |-- code: string (nullable = true) - | | | | | |-- display: string (nullable = true) - | | | | | |-- userSelected: boolean (nullable = true) - | | | | | |-- _fid: integer (nullable = true) - | | | |-- text: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- system: string (nullable = true) - | | |-- value: string (nullable = true) - | | |-- period: struct (nullable = true) - | | | |-- id: string (nullable = true) - | | | |-- start: string (nullable = true) - | | | |-- end: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- assigner: struct (nullable = true) - | | | |-- reference: string (nullable = true) - | | | |-- display: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- _fid: integer (nullable = true) - |-- active: boolean (nullable = true) - |-- name: array (nullable = true) - | |-- element: struct (containsNull = true) - | | |-- id: string (nullable = true) - | | |-- use: string (nullable = true) - | | |-- text: string (nullable = true) - | | |-- family: string (nullable = true) - | | |-- given: array (nullable = true) - | | | |-- element: string (containsNull = true) - | | |-- prefix: array (nullable = true) - | | | |-- element: string (containsNull = true) - | | |-- suffix: array (nullable = true) - | | | |-- element: string (containsNull = true) - | | |-- period: struct (nullable = true) - | | | |-- id: string (nullable = true) - | | | |-- start: string (nullable = true) - | | | |-- end: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- _fid: integer (nullable = true) - |-- telecom: array (nullable = true) - | |-- element: struct (containsNull = true) - | | |-- id: string (nullable = true) - | | |-- system: string (nullable = true) - | | |-- value: string (nullable = true) - | | |-- use: string (nullable = true) - | | |-- rank: integer (nullable = true) - | | |-- period: struct (nullable = true) - | | | |-- id: string (nullable = true) - | | | |-- start: string (nullable = true) - | | | |-- end: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- _fid: integer (nullable = true) - |-- gender: string (nullable = true) - |-- birthDate: string (nullable = true) - |-- deceasedBoolean: boolean (nullable = true) - |-- deceasedDateTime: string (nullable = true) - |-- address: array (nullable = true) - | |-- element: struct (containsNull = true) - | | |-- id: string (nullable = true) - | | |-- use: string (nullable = true) - | | |-- type: string (nullable = true) - | | |-- text: string (nullable = true) - | | |-- line: array (nullable = true) - | | | |-- element: string (containsNull = true) - | | |-- city: string (nullable = true) - | | |-- district: string (nullable = true) - | | |-- state: string (nullable = true) - | | |-- postalCode: string (nullable = true) - | | |-- country: string (nullable = true) - | | |-- period: struct (nullable = true) - | | | |-- id: string (nullable = true) - | | | |-- start: string (nullable = true) - | | | |-- end: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- _fid: integer (nullable = true) - |-- maritalStatus: struct (nullable = true) - | |-- id: string (nullable = true) - | |-- coding: array (nullable = true) - | | |-- element: struct (containsNull = true) - | | | |-- id: string (nullable = true) - | | | |-- system: string (nullable = true) - | | | |-- version: string (nullable = true) - | | | |-- code: string (nullable = true) - | | | |-- display: string (nullable = true) - | | | |-- userSelected: boolean (nullable = true) - | | | |-- _fid: integer (nullable = true) - | |-- text: string (nullable = true) - | |-- _fid: integer (nullable = true) - |-- multipleBirthBoolean: boolean (nullable = true) - |-- multipleBirthInteger: integer (nullable = true) - |-- photo: array (nullable = true) - | |-- element: struct (containsNull = true) - | | |-- id: string (nullable = true) - | | |-- contentType: string (nullable = true) - | | |-- language: string (nullable = true) - | | |-- data: binary (nullable = true) - | | |-- url: string (nullable = true) - | | |-- size: integer (nullable = true) - | | |-- hash: binary (nullable = true) - | | |-- title: string (nullable = true) - | | |-- creation: string (nullable = true) - | | |-- _fid: integer (nullable = true) - |-- contact: array (nullable = true) - | |-- element: struct (containsNull = true) - | | |-- id: string (nullable = true) - | | |-- relationship: array (nullable = true) - | | | |-- element: struct (containsNull = true) - | | | | |-- id: string (nullable = true) - | | | | |-- coding: array (nullable = true) - | | | | | |-- element: struct (containsNull = true) - | | | | | | |-- id: string (nullable = true) - | | | | | | |-- system: string (nullable = true) - | | | | | | |-- version: string (nullable = true) - | | | | | | |-- code: string (nullable = true) - | | | | | | |-- display: string (nullable = true) - | | | | | | |-- userSelected: boolean (nullable = true) - | | | | | | |-- _fid: integer (nullable = true) - | | | | |-- text: string (nullable = true) - | | | | |-- _fid: integer (nullable = true) - | | |-- name: struct (nullable = true) - | | | |-- id: string (nullable = true) - | | | |-- use: string (nullable = true) - | | | |-- text: string (nullable = true) - | | | |-- family: string (nullable = true) - | | | |-- given: array (nullable = true) - | | | | |-- element: string (containsNull = true) - | | | |-- prefix: array (nullable = true) - | | | | |-- element: string (containsNull = true) - | | | |-- suffix: array (nullable = true) - | | | | |-- element: string (containsNull = true) - | | | |-- period: struct (nullable = true) - | | | | |-- id: string (nullable = true) - | | | | |-- start: string (nullable = true) - | | | | |-- end: string (nullable = true) - | | | | |-- _fid: integer (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- telecom: array (nullable = true) - | | | |-- element: struct (containsNull = true) - | | | | |-- id: string (nullable = true) - | | | | |-- system: string (nullable = true) - | | | | |-- value: string (nullable = true) - | | | | |-- use: string (nullable = true) - | | | | |-- rank: integer (nullable = true) - | | | | |-- period: struct (nullable = true) - | | | | | |-- id: string (nullable = true) - | | | | | |-- start: string (nullable = true) - | | | | | |-- end: string (nullable = true) - | | | | | |-- _fid: integer (nullable = true) - | | | | |-- _fid: integer (nullable = true) - | | |-- address: struct (nullable = true) - | | | |-- id: string (nullable = true) - | | | |-- use: string (nullable = true) - | | | |-- type: string (nullable = true) - | | | |-- text: string (nullable = true) - | | | |-- line: array (nullable = true) - | | | | |-- element: string (containsNull = true) - | | | |-- city: string (nullable = true) - | | | |-- district: string (nullable = true) - | | | |-- state: string (nullable = true) - | | | |-- postalCode: string (nullable = true) - | | | |-- country: string (nullable = true) - | | | |-- period: struct (nullable = true) - | | | | |-- id: string (nullable = true) - | | | | |-- start: string (nullable = true) - | | | | |-- end: string (nullable = true) - | | | | |-- _fid: integer (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- gender: string (nullable = true) - | | |-- organization: struct (nullable = true) - | | | |-- reference: string (nullable = true) - | | | |-- display: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- period: struct (nullable = true) - | | | |-- id: string (nullable = true) - | | | |-- start: string (nullable = true) - | | | |-- end: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- _fid: integer (nullable = true) - |-- communication: array (nullable = true) - | |-- element: struct (containsNull = true) - | | |-- id: string (nullable = true) - | | |-- language: struct (nullable = true) - | | | |-- id: string (nullable = true) - | | | |-- coding: array (nullable = true) - | | | | |-- element: struct (containsNull = true) - | | | | | |-- id: string (nullable = true) - | | | | | |-- system: string (nullable = true) - | | | | | |-- version: string (nullable = true) - | | | | | |-- code: string (nullable = true) - | | | | | |-- display: string (nullable = true) - | | | | | |-- userSelected: boolean (nullable = true) - | | | | | |-- _fid: integer (nullable = true) - | | | |-- text: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- preferred: boolean (nullable = true) - | | |-- _fid: integer (nullable = true) - |-- generalPractitioner: array (nullable = true) - | |-- element: struct (containsNull = true) - | | |-- reference: string (nullable = true) - | | |-- display: string (nullable = true) - | | |-- _fid: integer (nullable = true) - |-- managingOrganization: struct (nullable = true) - | |-- reference: string (nullable = true) - | |-- display: string (nullable = true) - | |-- _fid: integer (nullable = true) - |-- link: array (nullable = true) - | |-- element: struct (containsNull = true) - | | |-- id: string (nullable = true) - | | |-- other: struct (nullable = true) - | | | |-- reference: string (nullable = true) - | | | |-- display: string (nullable = true) - | | | |-- _fid: integer (nullable = true) - | | |-- type: string (nullable = true) - | | |-- _fid: integer (nullable = true) - |-- _fid: integer (nullable = true) - |-- _extension: map (nullable = true) - | |-- key: integer - | |-- value: array (valueContainsNull = false) - | | |-- element: struct (containsNull = true) - | | | |-- id: string (nullable = true) - | | | |-- url: string (nullable = true) - | | | |-- valueAddress: struct (nullable = true) - | | | | |-- id: string (nullable = true) - | | | | |-- use: string (nullable = true) - | | | | |-- type: string (nullable = true) - | | | | |-- text: string (nullable = true) - | | | | |-- line: array (nullable = true) - | | | | | |-- element: string (containsNull = true) - | | | | |-- city: string (nullable = true) - | | | | |-- district: string (nullable = true) - | | | | |-- state: string (nullable = true) - | | | | |-- postalCode: string (nullable = true) - | | | | |-- country: string (nullable = true) - | | | | |-- period: struct (nullable = true) - | | | | | |-- id: string (nullable = true) - | | | | | |-- start: string (nullable = true) - | | | | | |-- end: string (nullable = true) - | | | | | |-- _fid: integer (nullable = true) - | | | | |-- _fid: integer (nullable = true) - | | | |-- valueBoolean: boolean (nullable = true) - | | | |-- valueCode: string (nullable = true) - | | | |-- valueCodeableConcept: struct (nullable = true) - | | | | |-- id: string (nullable = true) - | | | | |-- coding: array (nullable = true) - | | | | | |-- element: struct (containsNull = true) - | | | | | | |-- id: string (nullable = true) - | | | | | | |-- system: string (nullable = true) - | | | | | | |-- version: string (nullable = true) - | | | | | | |-- code: string (nullable = true) - | | | | | | |-- display: string (nullable = true) - | | | | | | |-- userSelected: boolean (nullable = true) - | | | | | | |-- _fid: integer (nullable = true) - | | | | |-- text: string (nullable = true) - | | | | |-- _fid: integer (nullable = true) - | | | |-- valueCoding: struct (nullable = true) - | | | | |-- id: string (nullable = true) - | | | | |-- system: string (nullable = true) - | | | | |-- version: string (nullable = true) - | | | | |-- code: string (nullable = true) - | | | | |-- display: string (nullable = true) - | | | | |-- userSelected: boolean (nullable = true) - | | | | |-- _fid: integer (nullable = true) - | | | |-- valueDateTime: string (nullable = true) - | | | |-- valueDate: string (nullable = true) - | | | |-- valueDecimal: decimal(32,6) (nullable = true) - | | | |-- valueDecimal_scale: integer (nullable = true) - | | | |-- valueIdentifier: struct (nullable = true) - | | | | |-- id: string (nullable = true) - | | | | |-- use: string (nullable = true) - | | | | |-- type: struct (nullable = true) - | | | | | |-- id: string (nullable = true) - | | | | | |-- coding: array (nullable = true) - | | | | | | |-- element: struct (containsNull = true) - | | | | | | | |-- id: string (nullable = true) - | | | | | | | |-- system: string (nullable = true) - | | | | | | | |-- version: string (nullable = true) - | | | | | | | |-- code: string (nullable = true) - | | | | | | | |-- display: string (nullable = true) - | | | | | | | |-- userSelected: boolean (nullable = true) - | | | | | | | |-- _fid: integer (nullable = true) - | | | | | |-- text: string (nullable = true) - | | | | | |-- _fid: integer (nullable = true) - | | | | |-- system: string (nullable = true) - | | | | |-- value: string (nullable = true) - | | | | |-- period: struct (nullable = true) - | | | | | |-- id: string (nullable = true) - | | | | | |-- start: string (nullable = true) - | | | | | |-- end: string (nullable = true) - | | | | | |-- _fid: integer (nullable = true) - | | | | |-- assigner: struct (nullable = true) - | | | | | |-- reference: string (nullable = true) - | | | | | |-- display: string (nullable = true) - | | | | | |-- _fid: integer (nullable = true) - | | | | |-- _fid: integer (nullable = true) - | | | |-- valueInteger: integer (nullable = true) - | | | |-- valueReference: struct (nullable = true) - | | | | |-- reference: string (nullable = true) - | | | | |-- display: string (nullable = true) - | | | | |-- _fid: integer (nullable = true) - | | | |-- valueString: string (nullable = true) - | | | |-- _fid: integer (nullable = true) +message spark_schema { + optional binary id (STRING); + optional binary id_versioned (STRING); + optional group meta { + optional binary id (STRING); + optional binary versionId (STRING); + optional binary versionId_versioned (STRING); + optional int96 lastUpdated; + optional binary source (STRING); + optional group profile (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional group security (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary version (STRING); + optional binary code (STRING); + optional binary display (STRING); + optional boolean userSelected; + optional int32 _fid; + } + } + } + optional group tag (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary version (STRING); + optional binary code (STRING); + optional binary display (STRING); + optional boolean userSelected; + optional int32 _fid; + } + } + } + optional int32 _fid; + } + optional binary implicitRules (STRING); + optional binary language (STRING); + optional group text { + optional binary id (STRING); + optional binary status (STRING); + optional binary div (STRING); + optional int32 _fid; + } + optional group identifier (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary use (STRING); + optional group type { + optional binary id (STRING); + optional group coding (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary version (STRING); + optional binary code (STRING); + optional binary display (STRING); + optional boolean userSelected; + optional int32 _fid; + } + } + } + optional binary text (STRING); + optional int32 _fid; + } + optional binary system (STRING); + optional binary value (STRING); + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional group assigner { + optional binary reference (STRING); + optional binary display (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + } + } + optional boolean active; + optional group name (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary use (STRING); + optional binary text (STRING); + optional binary family (STRING); + optional group given (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional group prefix (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional group suffix (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + } + } + optional group telecom (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary value (STRING); + optional binary use (STRING); + optional int32 rank; + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + } + } + optional binary gender (STRING); + optional binary birthDate (STRING); + optional boolean deceasedBoolean; + optional binary deceasedDateTime (STRING); + optional group address (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary use (STRING); + optional binary type (STRING); + optional binary text (STRING); + optional group line (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional binary city (STRING); + optional binary district (STRING); + optional binary state (STRING); + optional binary postalCode (STRING); + optional binary country (STRING); + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + } + } + optional group maritalStatus { + optional binary id (STRING); + optional group coding (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary version (STRING); + optional binary code (STRING); + optional binary display (STRING); + optional boolean userSelected; + optional int32 _fid; + } + } + } + optional binary text (STRING); + optional int32 _fid; + } + optional boolean multipleBirthBoolean; + optional int32 multipleBirthInteger; + optional group photo (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary contentType (STRING); + optional binary language (STRING); + optional binary data; + optional binary url (STRING); + optional int32 size; + optional binary hash; + optional binary title (STRING); + optional binary creation (STRING); + optional int32 _fid; + } + } + } + optional group contact (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional group relationship (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional group coding (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary version (STRING); + optional binary code (STRING); + optional binary display (STRING); + optional boolean userSelected; + optional int32 _fid; + } + } + } + optional binary text (STRING); + optional int32 _fid; + } + } + } + optional group name { + optional binary id (STRING); + optional binary use (STRING); + optional binary text (STRING); + optional binary family (STRING); + optional group given (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional group prefix (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional group suffix (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + optional group telecom (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary value (STRING); + optional binary use (STRING); + optional int32 rank; + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + } + } + optional group address { + optional binary id (STRING); + optional binary use (STRING); + optional binary type (STRING); + optional binary text (STRING); + optional group line (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional binary city (STRING); + optional binary district (STRING); + optional binary state (STRING); + optional binary postalCode (STRING); + optional binary country (STRING); + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + optional binary gender (STRING); + optional group organization { + optional binary reference (STRING); + optional binary display (STRING); + optional int32 _fid; + } + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + } + } + optional group communication (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional group language { + optional binary id (STRING); + optional group coding (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary version (STRING); + optional binary code (STRING); + optional binary display (STRING); + optional boolean userSelected; + optional int32 _fid; + } + } + } + optional binary text (STRING); + optional int32 _fid; + } + optional boolean preferred; + optional int32 _fid; + } + } + } + optional group generalPractitioner (LIST) { + repeated group list { + optional group element { + optional binary reference (STRING); + optional binary display (STRING); + optional int32 _fid; + } + } + } + optional group managingOrganization { + optional binary reference (STRING); + optional binary display (STRING); + optional int32 _fid; + } + optional group link (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional group other { + optional binary reference (STRING); + optional binary display (STRING); + optional int32 _fid; + } + optional binary type (STRING); + optional int32 _fid; + } + } + } + optional int32 _fid; + optional group _extension (MAP) { + repeated group key_value { + required int32 key; + required group value (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary url (STRING); + optional group valueAddress { + optional binary id (STRING); + optional binary use (STRING); + optional binary type (STRING); + optional binary text (STRING); + optional group line (LIST) { + repeated group list { + optional binary element (STRING); + } + } + optional binary city (STRING); + optional binary district (STRING); + optional binary state (STRING); + optional binary postalCode (STRING); + optional binary country (STRING); + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + optional boolean valueBoolean; + optional binary valueCode (STRING); + optional group valueCodeableConcept { + optional binary id (STRING); + optional group coding (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary version (STRING); + optional binary code (STRING); + optional binary display (STRING); + optional boolean userSelected; + optional int32 _fid; + } + } + } + optional binary text (STRING); + optional int32 _fid; + } + optional group valueCoding { + optional binary id (STRING); + optional binary system (STRING); + optional binary version (STRING); + optional binary code (STRING); + optional binary display (STRING); + optional boolean userSelected; + optional int32 _fid; + } + optional binary valueDateTime (STRING); + optional binary valueDate (STRING); + optional fixed_len_byte_array(14) valueDecimal (DECIMAL(32,6)); + optional int32 valueDecimal_scale; + optional group valueIdentifier { + optional binary id (STRING); + optional binary use (STRING); + optional group type { + optional binary id (STRING); + optional group coding (LIST) { + repeated group list { + optional group element { + optional binary id (STRING); + optional binary system (STRING); + optional binary version (STRING); + optional binary code (STRING); + optional binary display (STRING); + optional boolean userSelected; + optional int32 _fid; + } + } + } + optional binary text (STRING); + optional int32 _fid; + } + optional binary system (STRING); + optional binary value (STRING); + optional group period { + optional binary id (STRING); + optional binary start (STRING); + optional binary end (STRING); + optional int32 _fid; + } + optional group assigner { + optional binary reference (STRING); + optional binary display (STRING); + optional int32 _fid; + } + optional int32 _fid; + } + optional int32 valueInteger; + optional group valueReference { + optional binary reference (STRING); + optional binary display (STRING); + optional int32 _fid; + } + optional binary valueString (STRING); + optional int32 _fid; + } + } + } + } + } +} ``` From 92901c4a2ad7555e626f523effd8b53f252e7d25 Mon Sep 17 00:00:00 2001 From: John Grimes Date: Wed, 7 Feb 2024 14:50:34 +1000 Subject: [PATCH 004/175] Fix cut off words within primitive element type table --- site/docs/libraries/encoders/schema.md | 40 +++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/site/docs/libraries/encoders/schema.md b/site/docs/libraries/encoders/schema.md index feab17946a..0c24f9d302 100644 --- a/site/docs/libraries/encoders/schema.md +++ b/site/docs/libraries/encoders/schema.md @@ -54,26 +54,26 @@ field within the schema with the same name (except where otherwise specified). The data type SHALL be determined by the element type according to the following table: -| FHIR type | Spark SQL type | Additional requirements | -|-------------|----------------|----------------------------------------------------------------------------------------------| -| boolean | BOOLEAN | | -| canonical | BINARY (UTF8) | Compliant with the [FHIR canonical format](https://hl7.org/fhir/R4/datatypes.html#canonical) | -| code | BINARY (UTF8) | Compliant with the [FHIR code format](https://hl7.org/fhir/R4/datatypes.html#code) | -| dateTime | BINARY (UTF8) | Compliant with the [FHIR dateTime format](https://hl7.org/fhir/R4/datatypes.html#dateTime) | -| date | BINARY (UTF8) | Compliant with the [FHIR date format](https://hl7.org/fhir/R4/datatypes.html#date) | -| decimal | DECIMAL(32,6) | See [Decimal type](#decimal-type) | -| id | BINARY (UTF8) | See [ID type](#id-type) | -| instant | INT96 | | -| integer | INT32 | Compliant with the [FHIR integer format](https://hl7.org/fhir/R4/datatypes.html#integer) | -| markdown | BINARY (UTF8) | Compliant with the [FHIR markdown format](https://hl7.org/fhir/R4/datatypes.html#markdown) | -| oid | BINARY (UTF8) | Compliant with the [FHIR oid format](https://hl7.org/fhir/R4/datatypes.html#oid) | -| positiveInt | INT32 | liant with the [FHIR positiveInt format](https://hl7.org/fhir/R4/datatypes.html#positiveInt) | -| string | BINARY (UTF8) | Compliant with the [FHIR string format](https://hl7.org/fhir/R4/datatypes.html#string) | -| time | BINARY (UTF8) | Compliant with the [FHIR time format](https://hl7.org/fhir/R4/datatypes.html#time) | -| unsignedInt | INT32 | liant with the [FHIR unsignedInt format](https://hl7.org/fhir/R4/datatypes.html#unsignedInt) | -| uri | BINARY (UTF8) | Compliant with the [FHIR uri format](https://hl7.org/fhir/R4/datatypes.html#uri) | -| url | BINARY (UTF8) | Compliant with the [FHIR url format](https://hl7.org/fhir/R4/datatypes.html#url) | -| uuid | BINARY (UTF8) | Compliant with the [FHIR uuid format](https://hl7.org/fhir/R4/datatypes.html#uuid) | +| FHIR type | Spark SQL type | Additional requirements | +|-------------|----------------|--------------------------------------------------------------------------------------------------| +| boolean | BOOLEAN | | +| canonical | BINARY (UTF8) | Compliant with the [FHIR canonical format](https://hl7.org/fhir/R4/datatypes.html#canonical) | +| code | BINARY (UTF8) | Compliant with the [FHIR code format](https://hl7.org/fhir/R4/datatypes.html#code) | +| dateTime | BINARY (UTF8) | Compliant with the [FHIR dateTime format](https://hl7.org/fhir/R4/datatypes.html#dateTime) | +| date | BINARY (UTF8) | Compliant with the [FHIR date format](https://hl7.org/fhir/R4/datatypes.html#date) | +| decimal | DECIMAL(32,6) | See [Decimal type](#decimal-type) | +| id | BINARY (UTF8) | See [ID type](#id-type) | +| instant | INT96 | | +| integer | INT32 | Compliant with the [FHIR integer format](https://hl7.org/fhir/R4/datatypes.html#integer) | +| markdown | BINARY (UTF8) | Compliant with the [FHIR markdown format](https://hl7.org/fhir/R4/datatypes.html#markdown) | +| oid | BINARY (UTF8) | Compliant with the [FHIR oid format](https://hl7.org/fhir/R4/datatypes.html#oid) | +| positiveInt | INT32 | Compliant with the [FHIR positiveInt format](https://hl7.org/fhir/R4/datatypes.html#positiveInt) | +| string | BINARY (UTF8) | Compliant with the [FHIR string format](https://hl7.org/fhir/R4/datatypes.html#string) | +| time | BINARY (UTF8) | Compliant with the [FHIR time format](https://hl7.org/fhir/R4/datatypes.html#time) | +| unsignedInt | INT32 | Compliant with the [FHIR unsignedInt format](https://hl7.org/fhir/R4/datatypes.html#unsignedInt) | +| uri | BINARY (UTF8) | Compliant with the [FHIR uri format](https://hl7.org/fhir/R4/datatypes.html#uri) | +| url | BINARY (UTF8) | Compliant with the [FHIR url format](https://hl7.org/fhir/R4/datatypes.html#url) | +| uuid | BINARY (UTF8) | Compliant with the [FHIR uuid format](https://hl7.org/fhir/R4/datatypes.html#uuid) | ### Complex and backbone elements From 23a11222aa0db4860e1310fa72ab95c997be89f1 Mon Sep 17 00:00:00 2001 From: John Grimes Date: Wed, 7 Feb 2024 14:53:10 +1000 Subject: [PATCH 005/175] Add contained resources to out of scope --- site/docs/libraries/encoders/schema.md | 1 + 1 file changed, 1 insertion(+) diff --git a/site/docs/libraries/encoders/schema.md b/site/docs/libraries/encoders/schema.md index 0c24f9d302..98ee842e1a 100644 --- a/site/docs/libraries/encoders/schema.md +++ b/site/docs/libraries/encoders/schema.md @@ -41,6 +41,7 @@ The following features are not currently supported by this specification: - Primitive extensions - Profiled resources +- Contained resources ## Schema structure From 9cfba9ddbd8a2c9c2f28c5f476b40a57ef900bfe Mon Sep 17 00:00:00 2001 From: John Grimes Date: Wed, 7 Feb 2024 16:26:14 +1000 Subject: [PATCH 006/175] Fix link on Kafka page --- site/docs/libraries/kafka.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/docs/libraries/kafka.md b/site/docs/libraries/kafka.md index 9848f71290..48a29d7027 100644 --- a/site/docs/libraries/kafka.md +++ b/site/docs/libraries/kafka.md @@ -49,4 +49,4 @@ result = med_administrations.select( ``` For more information about Spark's Kafka integration, see -the [Structured Streaming + Kafka Integration Guide](https://spark.apache.org/docs/3.3.1/structured-streaming-kafka-integration.html#content). +the [Structured Streaming + Kafka Integration Guide](https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html). From 615d27d8ea83143520cfc7005a802322e55718a3 Mon Sep 17 00:00:00 2001 From: John Grimes Date: Wed, 7 Feb 2024 17:01:32 +1000 Subject: [PATCH 007/175] Add meta descriptions and social preview image to site --- site/docs/index.md | 4 ++++ site/docs/libraries/encoders/index.md | 1 + site/docs/libraries/encoders/schema.md | 4 ++++ site/docs/libraries/fhirpath-query.md | 1 + site/docs/libraries/index.md | 1 + site/docs/libraries/installation/databricks.md | 1 + site/docs/libraries/installation/index.md | 1 + site/docs/libraries/installation/spark.md | 1 + site/docs/libraries/installation/windows.md | 1 + site/docs/libraries/javascript/pathling-client.md | 4 ++++ site/docs/libraries/javascript/pathling-import.md | 4 ++++ site/docs/libraries/kafka.md | 1 + site/docs/libraries/terminology.md | 1 + site/docs/roadmap.md | 4 ++++ site/docs/server/async.md | 1 + site/docs/server/authorization.md | 1 + site/docs/server/caching.md | 1 + site/docs/server/configuration.md | 1 + site/docs/server/getting-started.md | 1 + site/docs/server/index.md | 1 + site/docs/server/kubernetes.md | 1 + site/docs/server/operations/aggregate.md | 1 + site/docs/server/operations/extract.md | 1 + site/docs/server/operations/import.md | 1 + site/docs/server/operations/search.md | 1 + site/docs/server/operations/update.md | 1 + site/docs/server/sync.md | 1 + site/docusaurus.config.js | 3 ++- 28 files changed, 44 insertions(+), 1 deletion(-) diff --git a/site/docs/index.md b/site/docs/index.md index 6fa3852ffd..e4c19df380 100644 --- a/site/docs/index.md +++ b/site/docs/index.md @@ -1,3 +1,7 @@ +--- +description: Pathling is a set of tools that make it easier to use FHIR and clinical terminology within health data analytics. +--- + # Overview Pathling is a set of tools that make it easier to diff --git a/site/docs/libraries/encoders/index.md b/site/docs/libraries/encoders/index.md index 2eccc88a9a..11a6ea7b91 100644 --- a/site/docs/libraries/encoders/index.md +++ b/site/docs/libraries/encoders/index.md @@ -1,5 +1,6 @@ --- sidebar_position: 2 +description: The Pathling library can be used to transform FHIR Bundles or NDJSON into Spark data sets. --- # FHIR encoders diff --git a/site/docs/libraries/encoders/schema.md b/site/docs/libraries/encoders/schema.md index 98ee842e1a..e2cac84a95 100644 --- a/site/docs/libraries/encoders/schema.md +++ b/site/docs/libraries/encoders/schema.md @@ -1,3 +1,7 @@ +--- +description: A scheme for representing FHIR resources within a Parquet schema. +--- + # Parquet specification This specification describes a scheme for representing FHIR resources within a diff --git a/site/docs/libraries/fhirpath-query.md b/site/docs/libraries/fhirpath-query.md index 754237c312..a206dae8df 100644 --- a/site/docs/libraries/fhirpath-query.md +++ b/site/docs/libraries/fhirpath-query.md @@ -1,5 +1,6 @@ --- sidebar_position: 3 +description: The Pathling library can be used to query datasets of FHIR resources using FHIRPath. This is useful for aggregating data, and creating custom views. --- # FHIRPath query diff --git a/site/docs/libraries/index.md b/site/docs/libraries/index.md index 8390d93f36..8cfe967fbc 100644 --- a/site/docs/libraries/index.md +++ b/site/docs/libraries/index.md @@ -1,6 +1,7 @@ --- sidebar_position: 1 sidebar_label: Introduction +description: Pathling provides a set of libraries that provide assistance with using FHIR and terminology services from Apache Spark applications and data science workflows. --- # Libraries diff --git a/site/docs/libraries/installation/databricks.md b/site/docs/libraries/installation/databricks.md index f28d856673..71f9dea873 100644 --- a/site/docs/libraries/installation/databricks.md +++ b/site/docs/libraries/installation/databricks.md @@ -1,5 +1,6 @@ --- sidebar_position: 3 +description: Instructions for installing the Pathling library on a Databricks cluster. --- # Databricks installation diff --git a/site/docs/libraries/installation/index.md b/site/docs/libraries/installation/index.md index 5f437a2e48..fbf42eca27 100644 --- a/site/docs/libraries/installation/index.md +++ b/site/docs/libraries/installation/index.md @@ -2,6 +2,7 @@ sidebar_position: 1 title: Installation sidebar_label: Installation +description: Instructions for installing the Pathling libraries for Python, R, Scala, and Java. --- ### Python diff --git a/site/docs/libraries/installation/spark.md b/site/docs/libraries/installation/spark.md index 3ad8d7edb2..45cd74d4cf 100644 --- a/site/docs/libraries/installation/spark.md +++ b/site/docs/libraries/installation/spark.md @@ -1,5 +1,6 @@ --- sidebar_position: 4 +description: Instructions for configuring Apache Spark to use the Pathling library. --- # Spark configuration diff --git a/site/docs/libraries/installation/windows.md b/site/docs/libraries/installation/windows.md index 62d83d92b2..7f70b98f7b 100644 --- a/site/docs/libraries/installation/windows.md +++ b/site/docs/libraries/installation/windows.md @@ -1,5 +1,6 @@ --- sidebar_position: 2 +description: Instructions for installing the Pathling libraries on Windows. --- # Windows installation diff --git a/site/docs/libraries/javascript/pathling-client.md b/site/docs/libraries/javascript/pathling-client.md index a785487c85..b9a15f00f1 100644 --- a/site/docs/libraries/javascript/pathling-client.md +++ b/site/docs/libraries/javascript/pathling-client.md @@ -1,3 +1,7 @@ +--- +description: A client library for the Pathling FHIR API, for use with JavaScript and TypeScript. +--- + # pathling-client [pathling-client](https://www.npmjs.com/package/pathling-client) is a client diff --git a/site/docs/libraries/javascript/pathling-import.md b/site/docs/libraries/javascript/pathling-import.md index c6d3617248..e4dc3df651 100644 --- a/site/docs/libraries/javascript/pathling-import.md +++ b/site/docs/libraries/javascript/pathling-import.md @@ -1,3 +1,7 @@ +--- +description: A set of functions written in TypeScript that facilitate the export of resources from a FHIR server and import into Pathling, via a staging S3 bucket. They can also be used as AWS Lambda functions. +--- + # pathling-import [pathling-import](https://www.npmjs.com/package/pathling-import) is a set of diff --git a/site/docs/libraries/kafka.md b/site/docs/libraries/kafka.md index 48a29d7027..1a597b33af 100644 --- a/site/docs/libraries/kafka.md +++ b/site/docs/libraries/kafka.md @@ -1,5 +1,6 @@ --- sidebar_position: 7 +description: Pathling supports Kafka as a streaming data source, and all the operations available within the library are able to execute continuously across a stream of data. --- # Kafka integration diff --git a/site/docs/libraries/terminology.md b/site/docs/libraries/terminology.md index c51505cac0..2b5223ebc9 100644 --- a/site/docs/libraries/terminology.md +++ b/site/docs/libraries/terminology.md @@ -1,5 +1,6 @@ --- sidebar_position: 4 +description: The Pathling library provides a set of functions for querying a FHIR terminology server from within your queries and transformations. --- # Terminology functions diff --git a/site/docs/roadmap.md b/site/docs/roadmap.md index 521039ad75..2072211426 100644 --- a/site/docs/roadmap.md +++ b/site/docs/roadmap.md @@ -1,3 +1,7 @@ +--- +description: We are continually adding new features to the various different components of Pathling. +--- + # Roadmap We are continually adding new features to the various different components of diff --git a/site/docs/server/async.md b/site/docs/server/async.md index 5e5f8e5c8d..c57db31187 100644 --- a/site/docs/server/async.md +++ b/site/docs/server/async.md @@ -1,6 +1,7 @@ --- sidebar_position: 7 sidebar_label: Async +description: Pathling implements the Asynchronous Request Pattern within the FHIR specification, to provide a way to execute long-running requests and check on their progress using a status endpoint. --- # Asynchronous processing diff --git a/site/docs/server/authorization.md b/site/docs/server/authorization.md index ffc6c60ac7..1355a51327 100644 --- a/site/docs/server/authorization.md +++ b/site/docs/server/authorization.md @@ -1,5 +1,6 @@ --- sidebar_position: 6 +description: Pathling can perform the role of a resource server within the OpenID Connect framework. --- # Authorization diff --git a/site/docs/server/caching.md b/site/docs/server/caching.md index f1a10000a6..be509c62c0 100644 --- a/site/docs/server/caching.md +++ b/site/docs/server/caching.md @@ -1,5 +1,6 @@ --- sidebar_position: 8 +description: Pathling implements ETag-based cache validation, which enables clients to skip processing of queries when the underlying data has not changed. --- # Caching diff --git a/site/docs/server/configuration.md b/site/docs/server/configuration.md index a045cefa49..3988ab13e1 100644 --- a/site/docs/server/configuration.md +++ b/site/docs/server/configuration.md @@ -1,5 +1,6 @@ --- sidebar_position: 5 +description: Configuration options for the Pathling server. --- # Configuration diff --git a/site/docs/server/getting-started.md b/site/docs/server/getting-started.md index 988d594252..0b3c293984 100644 --- a/site/docs/server/getting-started.md +++ b/site/docs/server/getting-started.md @@ -1,5 +1,6 @@ --- sidebar_position: 2 +description: Instructions for getting started with Pathling server. --- # Getting started diff --git a/site/docs/server/index.md b/site/docs/server/index.md index d684e2d95d..01074af4bd 100644 --- a/site/docs/server/index.md +++ b/site/docs/server/index.md @@ -1,6 +1,7 @@ --- sidebar_position: 1 sidebar_label: Introduction +description: Pathling provides a server based on the HL7 FHIR standard, implementing special functionality designed to ease the delivery of apps and augment tasks related to health data analytics. --- # Server diff --git a/site/docs/server/kubernetes.md b/site/docs/server/kubernetes.md index d779d8c841..8b4fcc2028 100644 --- a/site/docs/server/kubernetes.md +++ b/site/docs/server/kubernetes.md @@ -1,6 +1,7 @@ --- sidebar_position: 10 sidebar_label: Kubernetes +description: Instructions for deploying Pathling server on Kubernetes using Helm. --- # Kubernetes diff --git a/site/docs/server/operations/aggregate.md b/site/docs/server/operations/aggregate.md index be66d2dd01..a580236f60 100644 --- a/site/docs/server/operations/aggregate.md +++ b/site/docs/server/operations/aggregate.md @@ -1,5 +1,6 @@ --- sidebar_position: 3 +description: The aggregate operation allows a user to perform aggregate queries on data held within the Pathling FHIR server. --- # Aggregate diff --git a/site/docs/server/operations/extract.md b/site/docs/server/operations/extract.md index b098c8b7fc..54edce7bda 100644 --- a/site/docs/server/operations/extract.md +++ b/site/docs/server/operations/extract.md @@ -1,5 +1,6 @@ --- sidebar_position: 4 +description: The extract operation allows a user to create arbitrary tabular extracts from FHIR data, by specifying columns in terms of set of FHIRPath expressions that are used to populate them. --- # Extract diff --git a/site/docs/server/operations/import.md b/site/docs/server/operations/import.md index 37c9f560b0..ae5238e95b 100644 --- a/site/docs/server/operations/import.md +++ b/site/docs/server/operations/import.md @@ -1,5 +1,6 @@ --- sidebar_position: 1 +description: The import operation allows FHIR data to be imported into the server, making it available for query via other operations such as search, aggregate and extract. --- # Import diff --git a/site/docs/server/operations/search.md b/site/docs/server/operations/search.md index 79efcad00e..37498be9c0 100644 --- a/site/docs/server/operations/search.md +++ b/site/docs/server/operations/search.md @@ -1,5 +1,6 @@ --- sidebar_position: 2 +description: Pathling server supports a FHIRPath-based search profile that allows you to retrieve a set of FHIR resources from the server filtered by one or more FHIRPath expressions. --- # Search diff --git a/site/docs/server/operations/update.md b/site/docs/server/operations/update.md index 5e392e2331..1c9e544f72 100644 --- a/site/docs/server/operations/update.md +++ b/site/docs/server/operations/update.md @@ -1,5 +1,6 @@ --- sidebar_position: 5 +description: Pathling implements the update and batch operations from the FHIR REST API, to allow for the creation and update of individual resources within the server. --- # Update and batch diff --git a/site/docs/server/sync.md b/site/docs/server/sync.md index d362eed58d..f3a58e86d3 100644 --- a/site/docs/server/sync.md +++ b/site/docs/server/sync.md @@ -1,6 +1,7 @@ --- sidebar_position: 9 sidebar_label: Synchronization +description: Synchronize Pathling server with other FHIR servers using subscriptions or bulk data export. --- # Synchronization with other FHIR servers diff --git a/site/docusaurus.config.js b/site/docusaurus.config.js index 9fdc6ba3b3..f66e05f42a 100644 --- a/site/docusaurus.config.js +++ b/site/docusaurus.config.js @@ -95,7 +95,8 @@ const config = { theme: lightCodeTheme, darkTheme: darkCodeTheme, additionalLanguages: ["java", "scala", "yaml", "docker", "r"] - } + }, + image: "/assets/images/social-preview.png", }) }; From 629d62cb22a175417c2a467a6593e401e947ccd3 Mon Sep 17 00:00:00 2001 From: John Grimes Date: Thu, 8 Feb 2024 06:32:54 +1000 Subject: [PATCH 008/175] Change Java and Scala installation advice to use library-runtime --- site/docs/libraries/installation/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/docs/libraries/installation/index.md b/site/docs/libraries/installation/index.md index fbf42eca27..6e51a34852 100644 --- a/site/docs/libraries/installation/index.md +++ b/site/docs/libraries/installation/index.md @@ -39,7 +39,7 @@ To add the Pathling library to your project, add the following to your [SBT](https://www.scala-sbt.org/) configuration: ```scala -libraryDependencies += "au.csiro.pathling" % "library-api" % "[version]" +libraryDependencies += "au.csiro.pathling" % "library-runtime" % "[version]" ``` ### Java @@ -50,7 +50,7 @@ your `pom.xml`: ```xml au.csiro.pathling - library-api + library-runtime [version] ``` From b72d032c07c9cf21bc872452eda7a8ee1d2833b0 Mon Sep 17 00:00:00 2001 From: John Grimes Date: Thu, 8 Feb 2024 09:41:43 +1000 Subject: [PATCH 009/175] Fix heading on primitive element type table --- site/docs/libraries/encoders/schema.md | 40 +++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/site/docs/libraries/encoders/schema.md b/site/docs/libraries/encoders/schema.md index e2cac84a95..bdf1eadd61 100644 --- a/site/docs/libraries/encoders/schema.md +++ b/site/docs/libraries/encoders/schema.md @@ -59,26 +59,26 @@ field within the schema with the same name (except where otherwise specified). The data type SHALL be determined by the element type according to the following table: -| FHIR type | Spark SQL type | Additional requirements | -|-------------|----------------|--------------------------------------------------------------------------------------------------| -| boolean | BOOLEAN | | -| canonical | BINARY (UTF8) | Compliant with the [FHIR canonical format](https://hl7.org/fhir/R4/datatypes.html#canonical) | -| code | BINARY (UTF8) | Compliant with the [FHIR code format](https://hl7.org/fhir/R4/datatypes.html#code) | -| dateTime | BINARY (UTF8) | Compliant with the [FHIR dateTime format](https://hl7.org/fhir/R4/datatypes.html#dateTime) | -| date | BINARY (UTF8) | Compliant with the [FHIR date format](https://hl7.org/fhir/R4/datatypes.html#date) | -| decimal | DECIMAL(32,6) | See [Decimal type](#decimal-type) | -| id | BINARY (UTF8) | See [ID type](#id-type) | -| instant | INT96 | | -| integer | INT32 | Compliant with the [FHIR integer format](https://hl7.org/fhir/R4/datatypes.html#integer) | -| markdown | BINARY (UTF8) | Compliant with the [FHIR markdown format](https://hl7.org/fhir/R4/datatypes.html#markdown) | -| oid | BINARY (UTF8) | Compliant with the [FHIR oid format](https://hl7.org/fhir/R4/datatypes.html#oid) | -| positiveInt | INT32 | Compliant with the [FHIR positiveInt format](https://hl7.org/fhir/R4/datatypes.html#positiveInt) | -| string | BINARY (UTF8) | Compliant with the [FHIR string format](https://hl7.org/fhir/R4/datatypes.html#string) | -| time | BINARY (UTF8) | Compliant with the [FHIR time format](https://hl7.org/fhir/R4/datatypes.html#time) | -| unsignedInt | INT32 | Compliant with the [FHIR unsignedInt format](https://hl7.org/fhir/R4/datatypes.html#unsignedInt) | -| uri | BINARY (UTF8) | Compliant with the [FHIR uri format](https://hl7.org/fhir/R4/datatypes.html#uri) | -| url | BINARY (UTF8) | Compliant with the [FHIR url format](https://hl7.org/fhir/R4/datatypes.html#url) | -| uuid | BINARY (UTF8) | Compliant with the [FHIR uuid format](https://hl7.org/fhir/R4/datatypes.html#uuid) | +| FHIR type | Parquet type | Additional requirements | +|-------------|---------------|--------------------------------------------------------------------------------------------------| +| boolean | BOOLEAN | | +| canonical | BINARY (UTF8) | Compliant with the [FHIR canonical format](https://hl7.org/fhir/R4/datatypes.html#canonical) | +| code | BINARY (UTF8) | Compliant with the [FHIR code format](https://hl7.org/fhir/R4/datatypes.html#code) | +| dateTime | BINARY (UTF8) | Compliant with the [FHIR dateTime format](https://hl7.org/fhir/R4/datatypes.html#dateTime) | +| date | BINARY (UTF8) | Compliant with the [FHIR date format](https://hl7.org/fhir/R4/datatypes.html#date) | +| decimal | DECIMAL(32,6) | See [Decimal type](#decimal-type) | +| id | BINARY (UTF8) | See [ID type](#id-type) | +| instant | INT96 | | +| integer | INT32 | Compliant with the [FHIR integer format](https://hl7.org/fhir/R4/datatypes.html#integer) | +| markdown | BINARY (UTF8) | Compliant with the [FHIR markdown format](https://hl7.org/fhir/R4/datatypes.html#markdown) | +| oid | BINARY (UTF8) | Compliant with the [FHIR oid format](https://hl7.org/fhir/R4/datatypes.html#oid) | +| positiveInt | INT32 | Compliant with the [FHIR positiveInt format](https://hl7.org/fhir/R4/datatypes.html#positiveInt) | +| string | BINARY (UTF8) | Compliant with the [FHIR string format](https://hl7.org/fhir/R4/datatypes.html#string) | +| time | BINARY (UTF8) | Compliant with the [FHIR time format](https://hl7.org/fhir/R4/datatypes.html#time) | +| unsignedInt | INT32 | Compliant with the [FHIR unsignedInt format](https://hl7.org/fhir/R4/datatypes.html#unsignedInt) | +| uri | BINARY (UTF8) | Compliant with the [FHIR uri format](https://hl7.org/fhir/R4/datatypes.html#uri) | +| url | BINARY (UTF8) | Compliant with the [FHIR url format](https://hl7.org/fhir/R4/datatypes.html#url) | +| uuid | BINARY (UTF8) | Compliant with the [FHIR uuid format](https://hl7.org/fhir/R4/datatypes.html#uuid) | ### Complex and backbone elements From 6b638f4075ede9ebf69e01704ad517dc6fecfe2d Mon Sep 17 00:00:00 2001 From: John Grimes Date: Sun, 17 Mar 2024 18:02:44 +1000 Subject: [PATCH 010/175] Add intro video embed to docs index page --- site/docs/index.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/site/docs/index.md b/site/docs/index.md index e4c19df380..fc1c448bf1 100644 --- a/site/docs/index.md +++ b/site/docs/index.md @@ -9,13 +9,7 @@ use [FHIR®](https://hl7.org/fhir) and clinical terminology within health data analytics. It is built on [Apache Spark](https://spark.apache.org), and includes both language libraries and a server implementation. -import Components from '@site/src/images/components.png'; -import Components2x from '@site/src/images/components@2x.png'; -import ComponentsDark from '@site/src/images/components-dark.png'; -import ComponentsDark2x from '@site/src/images/components-dark@2x.png'; - - - + ## What can it do? @@ -73,6 +67,14 @@ aggregate, group and transform FHIR data. See [Server](/docs/server) for more information. +import Components from '@site/src/images/components.png'; +import Components2x from '@site/src/images/components@2x.png'; +import ComponentsDark from '@site/src/images/components-dark.png'; +import ComponentsDark2x from '@site/src/images/components-dark@2x.png'; + + + + ## Licensing and attribution Pathling is a product of the From 968be82e6c6713cf1eccf313d6a77956d7606c2b Mon Sep 17 00:00:00 2001 From: John Grimes Date: Sun, 17 Mar 2024 19:22:13 +1000 Subject: [PATCH 011/175] Fix width of video on mobile --- site/docs/index.md | 7 ++++++- site/src/css/custom.css | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/site/docs/index.md b/site/docs/index.md index fc1c448bf1..65257546ea 100644 --- a/site/docs/index.md +++ b/site/docs/index.md @@ -9,7 +9,12 @@ use [FHIR®](https://hl7.org/fhir) and clinical terminology within health data analytics. It is built on [Apache Spark](https://spark.apache.org), and includes both language libraries and a server implementation. - +