steno-aarhus · lwjohnst86 · Jun 19, 2024 · May 16, 2024 · May 16, 2024 · May 16, 2024
@@ -0,0 +1,3 @@
+name,logic
+hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5)
+
@@ -8,5 +8,6 @@
 
 library(testthat)
 library(osdc)
+library(dplyr)
 
 test_check("osdc")
@@ -0,0 +1,77 @@
+lab_forsker <- tibble::tribble(
+  ~patient_cpr, ~samplingdate, ~analysiscode, ~value,
+  "498718589800", "20230101", "NPU27300", 49,
+  "498718589801", "20230101", "NPU03835", 6.6,
+  "498718589802", "20230101", "NPU03835", 6.3,
+  "498718589803", "20230101", "NPU27300", 47,
+  # Duplicate patient_cpr but with the old units.
+  "498718589803", "20210101", "NPU27300", 49,
+  "498718589803", "20220101", "NPU03835", 6.5,
+  # Duplicate patient_cpr when old and new units are the same date.
+  "498718589805", "20000101", "NPU03835", 6.5,
+  "498718589805", "20000101", "NPU27300", 49,
+  # Duplicate but with old below threshold and new above it.
+  "498718589806", "20000101", "NPU03835", 6.3,
+  "498718589806", "20000101", "NPU27300", 49,
+  # Duplicate but with new below threshold and old above it.
+  "498718589807", "20200101", "NPU03835", 6.6,
+  "498718589807", "20200101", "NPU27300", 47,
+  "498718589808", "20220101", "NPU00000", 100,
+  "498718589809", "20220101", "NPU00000", 5
+)
+
+expected <- tibble::tribble(
+  ~pnr, ~date, ~included_hba1c,
+  "498718589800", "20230101", TRUE,
+  "498718589801", "20230101", TRUE,
+  "498718589803", "20210101", TRUE,
+  "498718589803", "20220101", TRUE,
+  "498718589805", "20000101", TRUE,
+  "498718589806", "20000101", TRUE,
+  "498718589807", "20200101", TRUE
+)
+
+test_that("dataset needs expected variables", {
+  actual <- lab_forsker
+  expect_error(include_hba1c(actual))
+})
+
+test_that("those with inclusion are kept", {
+  actual <- include_hba1c(lab_forsker)
+  expect_equal(actual, expected)
+})
+
+test_that("casing of input variables doesn't matter", {
+  actual <- lab_forsker |>
+    rename_with(\(columns) toupper(columns)) |>
+    include_hba1c()
+  expect_equal(actual, expected)
+})
+
+test_that("verification works for DuckDB Database", {
+  actual <- arrow::to_duckdb(lab_forsker) |>
+    include_hba1c()
+
+  expect_equal(actual, expected)
+})
+
+test_that("verification works for Arrow Tables (from Parquet)", {
+  actual <- arrow::as_arrow_table(lab_forsker) |>
+    include_hba1c()
+
+  expect_equal(actual, expected)
+})
+
+test_that("verification works for data.frame", {
+  actual <- as.data.frame(lab_forsker) |>
+    include_hba1c()
+
+  expect_equal(actual, expected)
+})
+
+test_that("verification works for data.table", {
+  actual <- data.table::as.data.table(lab_forsker) |>
+    include_hba1c()
+
+  expect_equal(actual, expected)
+})
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		name,logic
		hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5)
Copy link Member Author lwjohnst86 May 16, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. This is how I was thinking the logic would be stored and written down. signekb reacted with thumbs up emoji signekb reacted with rocket emoji Copy link Collaborator Aastedet May 17, 2024 • edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Very clever! For HbA1c we (currently) convert NPU03835 (%) values to NPU27300 (mmol/mol) values in the pipeline, so we could potentially simplify the logic to `hba1c,(analysiscode %in% c('NPU27300', 'NPU03835') AND value >= 48)` or similar Copy link Member Author lwjohnst86 Jun 14, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. I think it might be faster from a performance point of view to filter only and not do an additional calculation step before hand.