Skip to content

Commit

Permalink
feat!: rejigger resource/stratifier info for non-cubed tables
Browse files Browse the repository at this point in the history
For never-cubed tables (resources_per_pt and all quality summaries),
we were previously sticking a not-very-human-friendly identifier like
observation_valuecodeableconcept in the first column, named "id".

Now, we always name the first column "resource", add a second column
with the specific metric's stratifier, and use better casing.

id: observation_valuecodeableconcept
-> becomes ->
resource: Observation
field: valueCodeableConcept

All such tables now have two columns, even if a stratifier isn't used.
In which case, we call the second column "stratifier" and leave it as
all NULL.
  • Loading branch information
mikix committed Jun 20, 2024
1 parent 99a4a1b commit 98ccf40
Show file tree
Hide file tree
Showing 18 changed files with 132 additions and 118 deletions.
2 changes: 1 addition & 1 deletion cumulus_library_data_metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Data Metrics study for Cumulus Library"""

__version__ = "3.0.0"
__version__ = "4.0.0"
29 changes: 27 additions & 2 deletions cumulus_library_data_metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,34 @@ def __init__(self):
# some of them are not present (notably DocRef.context.period.start).
self.date_fields = copy.deepcopy(resource_info.DATES)

def make_summary(self) -> None:
def make_table_fragment(self, src: str, stratifier: str | None = None):
key = src.lower()
if stratifier:
key += f"_{stratifier.lower().replace(' ', '_')}"
return key

def add_summary_entry(
self, src: str, stratifier: str | None = None, *, denominator: str | None = None
) -> None:
# These are all flags for the summary-table-builder jinja.
key = self.make_table_fragment(src, stratifier)
self.summary_entries[key] = {
"src": src,
"stratifier": stratifier,
"denominator": denominator,
}

def make_summary(self, stratifier_column: str | None = None) -> None:
"""Makes a summary table, from all the individual metric tables"""
sql = self.render_sql("../base.summary", entries=self.summary_entries, metric=self.name)
# Always define *something* even if we don't use it, so that consuming visualizations
# can assume a consistent two-column definition of resource + stratifier.
stratifier_column = stratifier_column or "stratifier"
sql = self.render_sql(
"../base.summary",
entries=self.summary_entries,
stratifier_column=stratifier_column,
metric=self.name,
)
self.queries.append(sql)

def _check_for_deep_docref_date(self, field: str, fields_to_check: dict) -> bool:
Expand Down
17 changes: 11 additions & 6 deletions cumulus_library_data_metrics/base.summary.jinja
Original file line number Diff line number Diff line change
@@ -1,25 +1,30 @@
CREATE TABLE data_metrics__{{ metric }}_summary AS (
WITH
{%- for entry_key, denominator_sql in entries.items() %}
{%- for entry_key, summary_info in entries.items() %}
{{ entry_key }}_numerator AS (
SELECT '{{ entry_key }}' AS entry_key, COUNT(id) AS numerator
FROM data_metrics__{{ metric }}_{{ entry_key }}
),
{{ entry_key }}_denominator AS (
{% if denominator_sql and denominator_sql.strip() %}
WITH denominator_slice AS {{ denominator_sql }}
{% if summary_info["denominator"] and summary_info["denominator"].strip() %}
WITH denominator_slice AS {{ summary_info["denominator"] }}
{% else %}
WITH denominator_slice AS (SELECT id from {{ entry_key.split("_")[0] }})
WITH denominator_slice AS (SELECT id from {{ summary_info["src"] }})
{% endif %}
SELECT '{{ entry_key }}' AS entry_key, COUNT(id) AS denominator
FROM denominator_slice
),
{%- endfor %}

union_table AS (
{%- for entry_key in entries %}
{%- for entry_key, summary_info in entries.items() %}
SELECT
numerator_table.entry_key AS id,
'{{ summary_info["src"] }}' AS resource,
{% if summary_info["stratifier"] %}
'{{ summary_info["stratifier"] }}' AS {{ stratifier_column }},
{% else %}
NULL AS {{ stratifier_column }},
{% endif %}
numerator,
denominator,
CAST(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ summed_counts AS (
)

SELECT
'cumulus__all' AS id,
'cumulus__all' AS resource,
'cumulus__all' AS category,
CAST(AVG(num_resources) AS DECIMAL(18, 2)) AS average,
CAST(STDDEV_POP(num_resources) AS DECIMAL(18, 2)) AS std_dev,
Expand All @@ -74,7 +74,7 @@ FROM summed_counts
UNION
{%- for resource in patient_fields %}
SELECT
'{{ resource }}' AS id,
'{{ resource }}' AS resource,
'cumulus__all' AS category,
CAST(AVG(num_resources) AS DECIMAL(18, 2)) AS average,
CAST(STDDEV_POP(num_resources) AS DECIMAL(18, 2)) AS std_dev,
Expand All @@ -84,7 +84,7 @@ WHERE resource = '{{ resource }}'
{% if resource in categories %}
UNION
SELECT
'{{ resource }}' AS id,
'{{ resource }}' AS resource,
cat_values.category,
CAST(AVG(COALESCE(num_resources, 0)) AS DECIMAL(18, 2)) AS average,
CAST(STDDEV_POP(COALESCE(num_resources, 0)) AS DECIMAL(18, 2)) as std_dev,
Expand Down
4 changes: 1 addition & 3 deletions cumulus_library_data_metrics/q_date_recent/q_date_recent.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@ class DateRecentBuilder(MetricMixin, BaseTableBuilder):

def make_table(self, **kwargs) -> None:
"""Make a single metric table"""
summary_key = kwargs["src"].lower()
self.summary_entries[summary_key] = None

self.add_summary_entry(kwargs["src"])
self.queries.append(self.render_sql(self.name, **kwargs))

def add_metric_queries(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@ class TargetPopBuilder(MetricMixin, BaseTableBuilder):

def make_table(self, **kwargs) -> str:
"""Make a single metric table"""
summary_key = f"{kwargs['src'].lower()}_{kwargs['dest'].lower()}"
self.summary_entries[summary_key] = None

self.add_summary_entry(kwargs["src"], kwargs["dest"])
self.queries.append(self.render_sql(self.name, **kwargs))

def add_metric_queries(self) -> None:
Expand All @@ -26,4 +24,4 @@ def add_metric_queries(self) -> None:
self.make_table(src="MedicationRequest", dest="Patient", field="subject")
self.make_table(src="Observation", dest="Patient", field="subject")
self.make_table(src="Procedure", dest="Patient", field="subject")
self.make_summary()
self.make_summary(stratifier_column="target")
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ class TargetValidBuilder(MetricMixin, BaseTableBuilder):

def make_table(self, **kwargs) -> None:
"""Make a single metric table"""
summary_key = f"{kwargs['src'].lower()}_{kwargs['dest'].lower()}"
self.summary_entries[summary_key] = None

self.add_summary_entry(kwargs["src"], kwargs["dest"])
self.queries.append(self.render_sql(self.name, **kwargs))

def add_metric_queries(self) -> None:
Expand All @@ -49,4 +47,4 @@ def add_metric_queries(self) -> None:
self.make_table(src="Observation", dest="Encounter", field="encounter")
self.make_table(src="Procedure", dest="Patient", field="subject")
self.make_table(src="Procedure", dest="Encounter", field="encounter")
self.make_summary()
self.make_summary(stratifier_column="target")
6 changes: 2 additions & 4 deletions cumulus_library_data_metrics/q_system_use/q_system_use.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ class SystemUseBuilder(MetricMixin, BaseTableBuilder):

def make_table(self, **kwargs) -> None:
"""Make a single metric table"""
summary_key = f"{kwargs['src'].lower()}_{kwargs['field'].lower()}"
self.summary_entries[summary_key] = None

self.add_summary_entry(kwargs["src"], kwargs["field"])
self.queries.append(self.render_sql(self.name, **kwargs))

def add_metric_queries(self) -> None:
Expand Down Expand Up @@ -75,4 +73,4 @@ def add_metric_queries(self) -> None:
"http://www.cms.gov/Medicare/Coding/ICD10",
],
)
self.make_summary()
self.make_summary(stratifier_column="field")
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@ class ValidUsCoreV4Builder(UsCoreV4Mixin, BaseTableBuilder):

def make_table(self, **kwargs) -> None:
"""Make a single metric table"""
profile_name = self.get_profile_name(kwargs)
self.summary_entries[profile_name] = self.render_sql("../us_core_v4/slice", **kwargs)
self.add_summary_entry(
kwargs["src"],
kwargs.get("name"),
denominator=self.render_sql("../us_core_v4/slice", **kwargs),
)
self.queries.append(self.render_sql(self.name, **kwargs))

def add_metric_queries(self) -> None:
super().add_metric_queries()
self.make_summary()
self.make_summary(stratifier_column="profile")
6 changes: 1 addition & 5 deletions cumulus_library_data_metrics/t_us_core_v4/t_us_core_v4.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Fake/test metric for us_core_v4 profiles"""

import jinja2
from cumulus_library.base_table_builder import BaseTableBuilder

from cumulus_library_data_metrics.us_core_v4 import UsCoreV4Mixin
Expand All @@ -14,7 +13,4 @@ def make_table(self, **kwargs) -> None:
# Keep these separate to make it easier to tell the many valid_* fields apart in the
# "expected" csv files.
self.queries.append(self.render_sql("mandatory", **kwargs))
try:
self.queries.append(self.render_sql("must_support", **kwargs))
except jinja2.exceptions.TemplateNotFound:
pass # remove this try/except once we have must-support enabled for all profiles
self.queries.append(self.render_sql("must_support", **kwargs))
21 changes: 7 additions & 14 deletions cumulus_library_data_metrics/us_core_v4/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,9 @@ class UsCoreV4Mixin(MetricMixin):
},
}

@staticmethod
def get_profile_name(kwargs: dict[str, str]) -> str:
profile_name = kwargs["src"].lower()
if "name" in kwargs:
profile_name += f"_{kwargs['name']}"
return profile_name

def render_sql(self, template: str, **kwargs) -> str:
if "src" in kwargs:
kwargs["profile_name"] = self.get_profile_name(kwargs)
if src := kwargs.get("src"):
kwargs["profile_name"] = self.make_table_fragment(src, kwargs.get("name"))
return super().render_sql(template, **kwargs)

def make_table(self, **kwargs) -> None:
Expand All @@ -102,20 +95,20 @@ def add_metric_queries(self) -> None:
# So we run these first,
# self.make_table(src="Observation", name="blood_pressure", loinc="85354-9")
self.make_table(
src="Observation", name="laboratory", category="laboratory", mandatory_split=2
src="Observation", name="Laboratory", category="laboratory", mandatory_split=2
)
self.make_table(
src="Observation", name="smoking_status", loinc="72166-2", mandatory_split=2
src="Observation", name="Smoking Status", loinc="72166-2", mandatory_split=2
)
self.make_table(
src="Observation", name="vital_signs", category="vital-signs", mandatory_split=3
src="Observation", name="Vital Signs", category="vital-signs", mandatory_split=3
)

# Rest of profiles
self.make_table(src="AllergyIntolerance")
self.make_table(src="Condition")
self.make_table(src="DiagnosticReport", name="lab")
self.make_table(src="DiagnosticReport", name="note")
self.make_table(src="DiagnosticReport", name="Lab")
self.make_table(src="DiagnosticReport", name="Note")
self.make_table(src="DocumentReference")
self.make_table(src="Encounter")
self.make_table(src="Immunization")
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library_data_metrics/us_core_v4/slice.jinja
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{% if src == "DiagnosticReport" %}
{% import 'us_core_v4/diagnosticreport_utils.jinja' as dr_utils %}
{% if name == "lab" %}
{% if name == "Lab" %}
{{ dr_utils.extract_lab_slice(src) }}
{% else %}
{{ dr_utils.extract_non_lab_slice(src) }}
Expand Down
2 changes: 1 addition & 1 deletion tests/data/c_resources_per_pt/general/expected_summary.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
id,category,average,std_dev,max
resource,category,average,std_dev,max
cumulus__all,cumulus__all,3.00,0.82,4
ServiceRequest,cumulus__all,0.00,0.00,0
Procedure,cumulus__all,0.00,0.00,0
Expand Down
20 changes: 10 additions & 10 deletions tests/data/q_date_recent/general/expected_summary.csv
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
id,numerator,denominator,percentage
procedure,2,5,40.00
observation,0,0,0.00
medicationrequest,0,0,0.00
immunization,0,0,0.00
encounter,1,4,25.00
documentreference,0,0,0.00
diagnosticreport,0,0,0.00
condition,2,4,50.00
allergyintolerance,0,0,0.00
resource,stratifier,numerator,denominator,percentage
Procedure,,2,5,40.00
Observation,,0,0,0.00
MedicationRequest,,0,0,0.00
Immunization,,0,0,0.00
Encounter,,1,4,25.00
DocumentReference,,0,0,0.00
DiagnosticReport,,0,0,0.00
Condition,,2,4,50.00
AllergyIntolerance,,0,0,0.00
22 changes: 11 additions & 11 deletions tests/data/q_ref_target_pop/general/expected_summary.csv
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
id,numerator,denominator,percentage
resource,target,numerator,denominator,percentage
# Various edge cases here in procedure
procedure_patient,1,4,25.00
Procedure,Patient,1,4,25.00
# Rest are just short happy-path checks to confirm that we look at the right json field
observation_patient,0,1,0.00
medicationrequest_patient,0,1,0.00
immunization_patient,0,1,0.00
encounter_patient,0,1,0.00
documentreference_patient,0,1,0.00
diagnosticreport_patient,0,1,0.00
device_patient,0,1,0.00
condition_patient,0,1,0.00
allergyintolerance_patient,0,1,0.00
Observation,Patient,0,1,0.00
MedicationRequest,Patient,0,1,0.00
Immunization,Patient,0,1,0.00
Encounter,Patient,0,1,0.00
DocumentReference,Patient,0,1,0.00
DiagnosticReport,Patient,0,1,0.00
Device,Patient,0,1,0.00
Condition,Patient,0,1,0.00
AllergyIntolerance,Patient,0,1,0.00
38 changes: 19 additions & 19 deletions tests/data/q_ref_target_valid/general/expected_summary.csv
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
id,numerator,denominator,percentage
resource,target,numerator,denominator,percentage
# Various edge cases here in procedure
procedure_patient,1,9,11.11
procedure_encounter,1,9,11.11
Procedure,Patient,1,9,11.11
Procedure,Encounter,1,9,11.11
# Rest are just short happy-path checks to confirm that we look at the right json field
observation_patient,0,1,0.00
observation_encounter,0,1,0.00
medicationrequest_patient,0,1,0.00
medicationrequest_encounter,0,1,0.00
immunization_patient,0,1,0.00
immunization_encounter,0,1,0.00
encounter_patient,0,1,0.00
Observation,Patient,0,1,0.00
Observation,Encounter,0,1,0.00
MedicationRequest,Patient,0,1,0.00
MedicationRequest,Encounter,0,1,0.00
Immunization,Patient,0,1,0.00
Immunization,Encounter,0,1,0.00
Encounter,Patient,0,1,0.00
# Except DocRefs also have some extra cases around encounter array support
documentreference_patient,0,6,0.00
documentreference_encounter,3,6,50.00
diagnosticreport_patient,0,1,0.00
diagnosticreport_encounter,0,1,0.00
device_patient,0,1,0.00
condition_patient,0,1,0.00
condition_encounter,0,1,0.00
allergyintolerance_patient,0,1,0.00
allergyintolerance_encounter,0,1,0.00
DocumentReference,Patient,0,6,0.00
DocumentReference,Encounter,3,6,50.00
DiagnosticReport,Patient,0,1,0.00
DiagnosticReport,Encounter,0,1,0.00
Device,Patient,0,1,0.00
Condition,Patient,0,1,0.00
Condition,Encounter,0,1,0.00
AllergyIntolerance,Patient,0,1,0.00
AllergyIntolerance,Encounter,0,1,0.00
24 changes: 12 additions & 12 deletions tests/data/q_system_use/general/expected_summary.csv
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
id,numerator,denominator,percentage
resource,field,numerator,denominator,percentage
# Various edge cases here in procedure
procedure_code,4,12,33.33
Procedure,code,4,12,33.33
# Rest are often short happy-path checks to confirm that we look at the right json field
observation_valuecodeableconcept,0,2,0.00
observation_code,0,2,0.00
medicationrequest_medicationcodeableconcept,0,1,0.00
medication_code,0,1,0.00
immunization_vaccinecode,0,1,0.00
documentreference_type,1,3,33.33
diagnosticreport_code,0,1,0.00
device_type,0,1,0.00
condition_code,0,1,0.00
allergyintolerance_code,0,1,0.00
Observation,valueCodeableConcept,0,2,0.00
Observation,code,0,2,0.00
MedicationRequest,medicationCodeableConcept,0,1,0.00
Medication,code,0,1,0.00
Immunization,vaccineCode,0,1,0.00
DocumentReference,type,1,3,33.33
DiagnosticReport,code,0,1,0.00
Device,type,0,1,0.00
Condition,code,0,1,0.00
AllergyIntolerance,code,0,1,0.00
Loading

0 comments on commit 98ccf40

Please sign in to comment.