Skip to content

Commit

Permalink
Merge pull request #181 from samply/develop
Browse files Browse the repository at this point in the history
SQL allowlisted queries
  • Loading branch information
enola-dkfz authored Nov 4, 2024
2 parents 740d427 + 3e7471d commit 775d96c
Show file tree
Hide file tree
Showing 10 changed files with 283 additions and 14 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# Samply.Focus v0.8.0 2024-11-04

In this release, we are supporting 4 types of SQL queries for Exliquid and Organoids

## Major changes
* Allowlist of SQL queries


# Samply.Focus v0.7.0 2024-09-24

In this release, we are extending the supported data backends beyond CQL-enabled FHIR stores. We now support PostgreSQL as well. Usage instructions are included in the Readme.
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "focus"
version = "0.7.0"
version = "0.8.0"
edition = "2021"
license = "Apache-2.0"

Expand Down
43 changes: 43 additions & 0 deletions resources/sql/EXLIQUID_SAMPLE_3LEVELS
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
Exliquid query for sites with 'legacy' exliquid specimen documentation (3 level hierarchy versus 'virtual' mother sample).
For current expected documentation see: https://wiki.verbis.dkfz.de/pages/viewpage.action?pageId=294716167.
*/
with t as (
select
(s.resource ->> 'id')::text s_id,
(s_coding ->> 'code')::text sample_type
from specimen s, jsonb_array_elements(s.resource -> 'type' -> 'coding') as s_coding
where s_coding ->> 'system' = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType'
),
t2 as (
SELECT
s_ali.resource ->> 'id' s_ali_id,
sample_type_ali.sample_type as s_ali_type,
(s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_amountrest,
s_ali_grp.resource ->> 'id' s_ali_grp_id,
sample_type_ali_grp.sample_type as s_ali_grp_type,
(s_ali_grp.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_grp_amountrest,
s_mother.resource ->> 'id' s_mother_id,
sample_type_mother.sample_type as s_mother_type,
(s_mother.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_mother_amountrest,
s_mother.resource -> 'subject' ->> 'reference' as patient_id
FROM specimen s_ali
JOIN specimen s_ali_grp ON (s_ali.resource->'parent'->0->>'reference')::text = (s_ali_grp.resource->>'resourceType')::text || '/' || (s_ali_grp.resource->>'id')::text
JOIN specimen s_mother ON (s_ali_grp.resource->'parent'->0->>'reference')::text = (s_mother.resource->>'resourceType')::text || '/' || (s_mother.resource->>'id')::text
join t as sample_type_ali on s_ali.resource ->> 'id' = sample_type_ali.s_id
join t as sample_type_ali_grp on s_ali_grp.resource ->> 'id' = sample_type_ali_grp.s_id
join t as sample_type_mother on s_mother.resource ->> 'id' = sample_type_mother.s_id
where (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float > 0
),
t3 as (
select distinct
t2.patient_id,
c.resource -> 'code' -> 'coding' -> 0 ->> 'code' icd10_code,
c.resource -> 'code' ->> 'text' diag_desc,
t2.s_mother_type
from t2
join condition c on t2.patient_id = c.resource -> 'subject' ->> 'reference'
)
select icd10_code, diag_desc, count(distinct patient_id) patient_count, s_mother_type, count(s_mother_type) sample_count
from t3
group by icd10_code, diag_desc, patient_id, s_mother_type;
78 changes: 78 additions & 0 deletions resources/sql/SIORGP_PUBLIC_MAIN
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
SIorgP MetPredict project
The approach chosen here is to minimize the number of tasks generated and thus network traffic via Beam
=> one large query that returns the most necessary fields over multiple smaller queries
*/
with t as (
select
o.resource->'subject'->>'reference' as pat_ref,
o.resource->'code'->'coding'->0->>'code' as crf,
component->'code'->'coding'->0->>'code' AS code,
COALESCE(
component->'valueCodeableConcept'->'coding'->0->>'code',
component->>'valueDateTime',
component->'valueQuantity'->>'value',
component->>'valueString'
) AS value
FROM
observation o ,
jsonb_array_elements(o.resource->'component') AS component
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP%'
),
t2 AS (
select t.value as pat_pseudonym,
-- t.crf,
p.resource->>'gender' as gender,
p.resource->>'birthDate' as birth_date,
t5.value as organoid_id,
t2.value as location_primary_tumor,
t7.value as location_primary_tumor_precise,
t3.value as therapy,
t4.value as metastases_therapy,
t6.value::integer as age_at_enrollment
from t
left join t t2 on t.pat_ref = t2.pat_ref and t2.code='SIOP_LOCALISATION_PRIMARY_TUMOR'
left join t t3 on t.pat_ref = t3.pat_ref and t3.code='SIOP_NEOADJ_T_RECTAL_CARCINOMA'
left join t t4 on t.pat_ref = t4.pat_ref and t4.code='SIOP_NEOADJ_CTX_MET'
left join t t5 on t.pat_ref = t5.pat_ref and t5.code like 'SIOP_SAMPLE_M0%_PSEUDONYM'
left join t t6 on t.pat_ref = t6.pat_ref and t6.code='SIOP_AGE_STUDY_ENROLLMENT'
left join t t7 on t.pat_ref = t7.pat_ref and t7.code='SIOP_LOCALISATION_PRIMARY_TUMOR_COLON'
left join patient p on t.pat_ref = 'Patient/' || (p.resource->>'id')::text
where t.crf like 'SIOrgP - MetPredict - Visite 1%' and t.code = 'SIOP_PATIENT_PSEUDONYM'
),
t8 as (
select pat_pseudonym, count(distinct organoid_id) n_organoids
from t2
group by pat_pseudonym
)
-- patients having <= 3 organoids
select 'MetPredict' as project, 'pat_pdos_leq_3' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids <= 3) as value
union
-- patients having 4 organoids
select 'MetPredict' as project, 'pat_pdos_4' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 4) as value
union
-- patients having 5 organoids
select 'MetPredict' as project, 'pat_pdos_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 5) as value
union
-- patients having > 5 organoids
select 'MetPredict' as project, 'pat_pdos_gt_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids > 5) as value
union
-- the total number of patients
select 'MetPredict' as project, 'n_patients' as field, (select count(distinct pat_pseudonym) from t2) as value
union
-- the total number of organoids
select 'MetPredict' as project, 'n_organoids' as field, (select count(distinct organoid_id) from t2) as value
union
select 'MetPredict' as project, 'gender_male' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'male') as value
union
select 'MetPredict' as project, 'gender_female' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'female') as value
union
select 'MetPredict' as project, '<=30' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment <= 30) as value
union
select 'MetPredict' as project, '31-40' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 31 and age_at_enrollment <= 40) as value
union
select 'MetPredict' as project, '41-50' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 41 and age_at_enrollment <= 50) as value
union
select 'MetPredict' as project, '51-60' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 51 and age_at_enrollment <= 60) as value
union
select 'MetPredict' as project, '>=61' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 61) as value;
5 changes: 5 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NPAT
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Test query. Number of patients that have a documented visit 1
select count(distinct p.resource)
from observation o
join patient p on o.resource->'subject'->>'reference' = 'Patient/' || (p.resource->>'id')::text
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 1%';
4 changes: 4 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NVISIT2B
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Test query: Number of observations for visit 2b as a lower bound for the number of expected organoids
select count(o)
from observation o
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 2b%';
89 changes: 89 additions & 0 deletions resources/test/result_current.cql
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
library Retrieve
using FHIR version '4.0.0'
include FHIRHelpers version '4.0.0'

codesystem icd10: 'http://hl7.org/fhir/sid/icd-10'
codesystem SampleMaterialType: 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType'
codesystem icd10gm: 'http://fhir.de/CodeSystem/dimdi/icd-10-gm'
codesystem icd10gmnew: 'http://fhir.de/CodeSystem/bfarm/icd-10-gm'
codesystem StorageTemperature: 'https://fhir.bbmri.de/CodeSystem/StorageTemperature'


context Patient

define AgeClass:
if (Patient.birthDate is null) then 'unknown' else ToString((AgeInYears() div 10) * 10)

define Gender:
if (Patient.gender is null) then 'unknown' else Patient.gender

define Custodian:
First(from Specimen.extension E
where E.url = 'https://fhir.bbmri.de/StructureDefinition/Custodian'
return (E.value as Reference).identifier.value)

define function SampleType(specimen FHIR.Specimen):
case FHIRHelpers.ToCode(specimen.type.coding.where(system = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType').first())
when Code 'plasma-edta' from SampleMaterialType then 'blood-plasma'
when Code 'plasma-citrat' from SampleMaterialType then 'blood-plasma'
when Code 'plasma-heparin' from SampleMaterialType then 'blood-plasma'
when Code 'plasma-cell-free' from SampleMaterialType then 'blood-plasma'
when Code 'plasma-other' from SampleMaterialType then 'blood-plasma'
when Code 'plasma' from SampleMaterialType then 'blood-plasma'
when Code 'tissue-formalin' from SampleMaterialType then 'tissue-ffpe'
when Code 'tumor-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
when Code 'normal-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
when Code 'other-tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
when Code 'tumor-tissue-frozen' from SampleMaterialType then 'tissue-frozen'
when Code 'normal-tissue-frozen' from SampleMaterialType then 'tissue-frozen'
when Code 'other-tissue-frozen' from SampleMaterialType then 'tissue-frozen'
when Code 'tissue-paxgene-or-else' from SampleMaterialType then 'tissue-other'
when Code 'derivative' from SampleMaterialType then 'derivative-other'
when Code 'liquid' from SampleMaterialType then 'liquid-other'
when Code 'tissue' from SampleMaterialType then 'tissue-other'
when Code 'serum' from SampleMaterialType then 'blood-serum'
when Code 'cf-dna' from SampleMaterialType then 'dna'
when Code 'g-dna' from SampleMaterialType then 'dna'
when Code 'blood-plasma' from SampleMaterialType then 'blood-plasma'
when Code 'tissue-ffpe' from SampleMaterialType then 'tissue-ffpe'
when Code 'tissue-frozen' from SampleMaterialType then 'tissue-frozen'
when Code 'tissue-other' from SampleMaterialType then 'tissue-other'
when Code 'derivative-other' from SampleMaterialType then 'derivative-other'
when Code 'liquid-other' from SampleMaterialType then 'liquid-other'
when Code 'blood-serum' from SampleMaterialType then 'blood-serum'
when Code 'dna' from SampleMaterialType then 'dna'
when Code 'buffy-coat' from SampleMaterialType then 'buffy-coat'
when Code 'urine' from SampleMaterialType then 'urine'
when Code 'ascites' from SampleMaterialType then 'ascites'
when Code 'saliva' from SampleMaterialType then 'saliva'
when Code 'csf-liquor' from SampleMaterialType then 'csf-liquor'
when Code 'bone-marrow' from SampleMaterialType then 'bone-marrow'
when Code 'peripheral-blood-cells-vital' from SampleMaterialType then 'peripheral-blood-cells-vital'
when Code 'stool-faeces' from SampleMaterialType then 'stool-faeces'
when Code 'rna' from SampleMaterialType then 'rna'
when Code 'whole-blood' from SampleMaterialType then 'whole-blood'
when Code 'swab' from SampleMaterialType then 'swab'
when Code 'dried-whole-blood' from SampleMaterialType then 'dried-whole-blood'
when null then 'Unknown'
else 'Unknown'
end
define Specimen:
if InInitialPopulation then [Specimen] S where (((((FHIRHelpers.ToDateTime(S.collection.collected) between @1900-01-01 and @2024-10-25) )) and (((((S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/StorageTemperature').value.coding.code contains 'temperature2to10'))))))) else {} as List<Specimen>

define Diagnosis:
if InInitialPopulation then [Condition] else {} as List<Condition>

define function DiagnosisCode(condition FHIR.Condition):
condition.code.coding.where(system = 'http://fhir.de/CodeSystem/bfarm/icd-10-gm').code.first()

define function DiagnosisCode(condition FHIR.Condition, specimen FHIR.Specimen):
Coalesce(
condition.code.coding.where(system = 'http://hl7.org/fhir/sid/icd-10').code.first(),
condition.code.coding.where(system = 'http://fhir.de/CodeSystem/dimdi/icd-10-gm').code.first(),
condition.code.coding.where(system = 'http://fhir.de/CodeSystem/bfarm/icd-10-gm').code.first(),
specimen.extension.where(url='https://fhir.bbmri.de/StructureDefinition/SampleDiagnosis').value.coding.code.first()
)

define InInitialPopulation:
((((((Patient.gender = 'male')))) and ((((((exists[Condition: Code 'C61' from icd10]) or (exists[Condition: Code 'C61' from icd10gm]) or (exists[Condition: Code 'C61' from icd10gmnew])) or (exists from [Specimen] S where (S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/SampleDiagnosis').value.coding.code contains 'C61')))))) and (( AgeInYears() between Ceiling(10) and Ceiling(90)))) or (((exists from [Specimen] S
where FHIRHelpers.ToDateTime(S.collection.collected) between @1900-01-01 and @2024-10-25 )) and ((((exists from [Specimen] S where (S.extension.where(url='https://fhir.bbmri.de/StructureDefinition/StorageTemperature').value.coding contains Code 'temperature2to10' from StorageTemperature) ))))))
Loading

0 comments on commit 775d96c

Please sign in to comment.