Closes #39 cleaned up script commenting, structure of scripts for con…

…sistency (#46) * # 39 cleaned up commenting and structure of scripts to make them more consistent * #39 chore: styler * Update data-raw/pc.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * Update data-raw/pp.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * Update data-raw/rs_onco.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * Update data-raw/tr_onco.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * Update data-raw/ts.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * Update data-raw/tu_onco.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * Update data-raw/ae.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * Update data-raw/dm.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * Update data-raw/ds.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * Update data-raw/tr_onco.R Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com> * #39 data rerun with new labels --------- Co-authored-by: Kangjie Zhang <47867131+kaz462@users.noreply.github.com>
pharmaverse · Aug 9, 2023 · 2f6bc1b · 2f6bc1b
1 parent e763ab7
commit 2f6bc1b
Show file tree

Hide file tree

Showing 47 changed files with 356 additions and 213 deletions.
diff --git a/data-raw/ae.R b/data-raw/ae.R
@@ -1,14 +1,22 @@
-# from CDISC pilot study ----
+# Datasets: ae, suppae
+# Description: Standard AE, SUPPAE datasets from CDISC pilot study
+
+# Load libraries -----
 library(dplyr)
 library(metatools)
 library(haven)
 library(admiral)
 
+# Create ae, suppae ----
 raw_ae <- read_xpt("https://github.com/cdisc-org/sdtm-adam-pilot-project/blob/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/tabulations/sdtm/ae.xpt?raw=true") # nolint
 raw_suppae <- read_xpt("https://github.com/cdisc-org/sdtm-adam-pilot-project/blob/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/tabulations/sdtm/suppae.xpt?raw=true") # nolint
 ae <- convert_blanks_to_na(raw_ae)
 suppae <- convert_blanks_to_na(raw_suppae)
 
-# Save dataset ----
-save(ae, file = "data/ae.rda", compress = "bzip2")
+# Label dataset ----
+attr(ae, "label") <- "Adverse Events"
+attr(suppae, "label") <- "Supplemental Qualifiers for AE"
+
+# Save datasets ----
+usethis::use_data(ae, overwrite = TRUE)
 usethis::use_data(suppae, overwrite = TRUE)
diff --git a/data-raw/ae_ophtha.R b/data-raw/ae_ophtha.R
@@ -1,18 +1,22 @@
-# Update AE by adding AELAT variable for admiralophtha package
+# Dataset: ae_ophtha
+# Description: Add ophtha-specific AELAT variable to existing AE dataset
+
+# Load libraries -----
 library(dplyr)
 library(admiral)
 library(metatools)
 library(haven)
 
+# Create ae_ophtha ----
 # Start from standard AE dataset from this package - this should be
 # in the environment already if devtools::load_all() has been run
 ae_ophtha <- ae
 
-# create possible AELAT values - as collected on CRF ----
+## Create possible AELAT values - as collected on CRF ----
 lat <- c("LEFT", "RIGHT", "BOTH")
 
-# create AELAT variable ----
-# with random assignment of lat values where AESOC is "EYE DISORDERS"
+## Create AELAT variable ----
+# Use random assignment of lat values where AESOC is "EYE DISORDERS"
 # Set seed so that result stays the same for each run
 set.seed(1)
 
@@ -24,7 +28,8 @@ ae_ophtha$AELAT <- if_else(ae_ophtha$AESOC == "EYE DISORDERS",
 ae_ophtha <- ae_ophtha %>%
   add_labels(AELAT = "Laterality")
 
+# Label dataset ----
 attr(ae_ophtha, "label") <- "Adverse Events"
 
 # Save dataset ----
-save(ae_ophtha, file = "data/ae_ophtha.rda", compress = "bzip2")
+usethis::use_data(ae_ophtha, overwrite = TRUE)
diff --git a/data-raw/cm.R b/data-raw/cm.R
@@ -1,7 +1,16 @@
-# from CDISC pilot study ----
+# Dataset: cm
+# Description: Standard CM dataset from CDISC pilot study
+
+# Load libraries -----
 library(haven)
 library(admiral)
+
+# Create cm ----
 raw_cm <- read_xpt("https://github.com/cdisc-org/sdtm-adam-pilot-project/blob/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/tabulations/sdtm/cm.xpt?raw=true") # nolint
 cm <- convert_blanks_to_na(raw_cm)
 
+# Label dataset ----
+attr(cm, "label") <- "Concomitant Medications"
+
+# Save dataset ----
 usethis::use_data(cm, overwrite = TRUE)
diff --git a/data-raw/dm.R b/data-raw/dm.R
@@ -1,11 +1,21 @@
-# from CDISC pilot study ----
+# Datasets: dm, suppdm
+# Description: Standard DM, SUPPDM datasets from CDISC pilot study
+
+# Load libraries -----
 library(haven)
 library(admiral)
+
+# Create dm, suppdm ----
 sdtm_path <- "https://github.com/cdisc-org/sdtm-adam-pilot-project/blob/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/tabulations/sdtm/" # nolint
 raw_dm <- read_xpt(paste0(sdtm_path, "dm", ".xpt?raw=true"))
 raw_suppdm <- read_xpt(paste0(sdtm_path, "suppdm", ".xpt?raw=true"))
 dm <- convert_blanks_to_na(raw_dm)
 suppdm <- convert_blanks_to_na(raw_suppdm)
 
+# Label dataset ----
+attr(dm, "label") <- "Demographics"
+attr(suppdm, "label") <- "Supplemental Qualifiers for DM"
+
+# Save datasets ----
 usethis::use_data(dm, overwrite = TRUE)
 usethis::use_data(suppdm, overwrite = TRUE)
diff --git a/data-raw/ds.R b/data-raw/ds.R
@@ -1,29 +1,34 @@
-# Update DS by adding DSDECOD=RANDOMIZED rows
+# Datasets: ds, suppds
+# Description: Standard DS, SUPPDS datasets from CDISC pilot study with added DSDECOD = RANDOMIZED rows
 
+# Load libraries -----
 library(dplyr)
 library(tidyselect)
 library(labelled)
 library(admiral)
 library(metatools)
 library(haven)
 
+# Create ds ----
 data("dm")
 sdtm_path <- "https://github.com/cdisc-org/sdtm-adam-pilot-project/blob/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/tabulations/sdtm/" # nolint
 raw_ds <- read_xpt(paste0(sdtm_path, "ds", ".xpt?raw=true"))
 raw_suppds <- read_xpt(paste0(sdtm_path, "suppds", ".xpt?raw=true"))
 
-# Converting blank to NA
+## Converting blank to NA ----
 dm <- convert_blanks_to_na(dm)
 ds1a <- convert_blanks_to_na(raw_ds)
 suppds1a <- convert_blanks_to_na(raw_suppds)
 
-# Creating full DS data
+## Creating full DS data ----
 ds1a <- ds1a %>%
   mutate(DSSEQ = as.character(DSSEQ))
 ds1 <- combine_supp(ds1a, suppds1a) %>%
   mutate(DSSEQ = as.numeric(DSSEQ))
 
-# Creating RANDOMIZED records
+dsnames <- names(ds1a)
+
+## Creating RANDOMIZED records ----
 dm1 <- select(dm, c(STUDYID, USUBJID, RFSTDTC)) %>%
   filter(!is.na(RFSTDTC)) %>%
   mutate(
@@ -41,7 +46,7 @@ dm1 <- select(dm, c(STUDYID, USUBJID, RFSTDTC)) %>%
 
 ds2 <- bind_rows(ds1, select(dm1, -c(RFSTDTC)))
 
-# Adding labels
+## Adding labels ----
 dslab <- var_label(ds1a)
 var_label(ds2) <- dslab
 
@@ -56,7 +61,15 @@ ds3 <- ds2 %>%
   ) %>%
   ungroup()
 
-# Creating SUPPDS
+ds4 <- ds3 %>%
+  select(all_of(dsnames))
+
+## Label dataset ----
+attr(ds4, "label") <- "Disposition"
+
+ds <- ds4
+
+# Creating SUPPDS ----
 suppds1 <- select(ds3, c("STUDYID", "USUBJID", "DSSEQ", "DOMAIN", "ENTCRIT")) %>%
   filter(!is.na(ENTCRIT))
 
@@ -68,38 +81,28 @@ suppds2 <- rename(suppds1, "RDOMAIN" = "DOMAIN") %>%
     "QNAM" = "ENTCRIT",
     "QLABEL" = "PROTOCOL ENTRY CRITERIA NOT MET",
     "QORIG" = "CRF"
-  )
+  ) %>%
+  select(STUDYID, RDOMAIN, USUBJID, IDVAR, IDVARVAL, QNAM, QLABEL, QVAL, QORIG)
 
-suppds <- select(
-  suppds2,
-  c(
-    STUDYID, RDOMAIN, USUBJID,
-    IDVAR, IDVARVAL, QNAM, QLABEL,
-    QVAL, QORIG
+## Adding labels ----
+suppds3 <- suppds2 %>%
+  add_labels(
+    STUDYID = "Study Identifier",
+    RDOMAIN = "Related Domain Abbreviation",
+    USUBJID = "Unique Subject Identifier",
+    IDVAR = "Identifying Variable",
+    IDVARVAL = "Identifying Variable Value",
+    QNAM = "Qualifier Variable Name",
+    QLABEL = "Qualifier Variable Label",
+    QVAL = "Data Value",
+    QORIG = "Origin"
   )
-)
-
-admiral_suppds <- suppds %>% add_labels(
-  STUDYID = "Study Identifier",
-  RDOMAIN = "Related Domain Abbreviation",
-  USUBJID = "Unique Subject Identifier",
-  IDVAR = "Identifying Variable",
-  IDVARVAL = "Identifying Variable Value",
-  QNAM = "Qualifier Variable Name",
-  QLABEL = "Qualifier Variable Label",
-  QVAL = "Data Value",
-  QORIG = "Origin"
-)
-
-attr(admiral_suppds, "label") <- "Supplemental Disposition"
-
-# Creating DS
-dsnames <- names(ds1a)
-admiral_ds <- select(ds3, all_of(dsnames))
 
-attr(admiral_ds, "label") <- "Disposition"
+## Label dataset ----
+attr(suppds3, "label") <- "Supplemental Qualifiers for DS"
+
+suppds <- suppds3
 
-ds <- admiral_ds
-suppds <- admiral_suppds
-save(ds, file = "data/ds.rda", compress = "bzip2")
-save(suppds, file = "data/suppds.rda", compress = "bzip2")
+# Save datasets ----
+usethis::use_data(ds, overwrite = TRUE)
+usethis::use_data(suppds, overwrite = TRUE)
diff --git a/data-raw/ex.R b/data-raw/ex.R
@@ -1,7 +1,16 @@
-# from CDISC pilot study ----
+# Dataset: ex
+# Description: Standard EX dataset from CDISC pilot study
+
+# Load libraries -----
 library(haven)
 library(admiral)
+
+# Create ex ----
 raw_ex <- read_xpt("https://github.com/cdisc-org/sdtm-adam-pilot-project/blob/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/tabulations/sdtm/ex.xpt?raw=true") # nolint
 ex <- convert_blanks_to_na(raw_ex)
 
+# Label dataset ----
+attr(ex, "label") <- "Exposure"
+
+# Save dataset ----
 usethis::use_data(ex, overwrite = TRUE)
diff --git a/data-raw/ex_ophtha.R b/data-raw/ex_ophtha.R
@@ -1,14 +1,20 @@
+# Dataset: ex_ophtha
+# Description: Add ophtha-specific EXLOC, EXLAT variables to existing EX dataset
+# and modify EXROUTE, EXDOSFRM, EXDOSFRQ to something eye-related
+
+# Load libraries -----
 library(dplyr)
 library(tidyselect)
+
+# Make ex_ophtha dataset
 data("dm")
 data("ex")
 
-# Make ex_ophtha dataset
 ex_ophtha <- dm %>%
-  # Start by merging on ophtha_dm to use the SUBJID variable
+  ## Merge on ophtha_dm to use the SUBJID variable ----
   select(USUBJID, SUBJID) %>%
   right_join(ex, by = c("USUBJID"), multiple = "all") %>%
-  # Create EXLOC & EXLAT, change EXROUTE & EXDOSFRM to something eye-related
+  ## Create EXLOC & EXLAT, change EXROUTE & EXDOSFRM ----
   mutate(
     EXLOC = "EYE",
     EXDOSFRM = "INJECTION",
@@ -24,12 +30,15 @@ ex_ophtha <- dm %>%
     "EXENDTC", "EXSTDY", "EXENDY"
   )
 
-# Label new variables
+## Label new variables ----
 attr(ex_ophtha$EXLOC, "label") <- "Location of Dose Administration"
 attr(ex_ophtha$EXLAT, "label") <- "Laterality"
 attr(ex_ophtha$EXROUTE, "label") <- "Route of Administration"
 attr(ex_ophtha$EXDOSFRM, "label") <- "Dose Form"
 attr(ex_ophtha$EXDOSFRQ, "label") <- "Dose Frequency per Interval"
 
-# Save Dataset
-save(ex_ophtha, file = file.path("data", "ex_ophtha.rda"), compress = "bzip2")
+# Label dataset ----
+attr(ex_ophtha, "label") <- "Exposure"
+
+# Save dataset ----
+usethis::use_data(ex_ophtha, overwrite = TRUE)
diff --git a/data-raw/lb.R b/data-raw/lb.R
@@ -1,19 +1,21 @@
-# Update LB by adding percentage differential lab test rows
+# Dataset: lb
+# Description: Standard LB dataset from CDISC pilot study with added percentage differential lab test rows
 
+# Load libraries -----
 library(dplyr)
 library(haven)
 library(admiral)
 
+# Create LB ----
 sdtm_path <- "https://github.com/cdisc-org/sdtm-adam-pilot-project/blob/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/tabulations/sdtm/" # nolint
 raw_lb <- read_xpt(paste0(sdtm_path, "lb", ".xpt?raw=true"))
-lb <- raw_lb %>% convert_blanks_to_na()
+lb_orig <- raw_lb %>% convert_blanks_to_na()
 
-
-# Subset on differential lab tests
-lb_diff_abs <- lb %>%
+## Subset on differential lab tests ----
+lb_diff_abs <- lb_orig %>%
   filter(LBTESTCD %in% c("BASO", "EOS", "LYM", "MONO", "NEUT"))
 
-# Subset on a few patients and visits
+## Subset on a few patients and visits ----
 subject_sub <- lb_diff_abs %>%
   distinct(USUBJID) %>%
   head()
@@ -24,7 +26,7 @@ lb_sub <- lb_diff_abs %>%
     VISIT %in% c("SCREENING 1", "WEEK 2")
   )
 
-# Create dummy differential lab tests
+## Create dummy differential lab tests ----
 set.seed(1)
 rand_diff <- sample(seq(0, 0.5, by = 0.05), replace = T, nrow(lb_sub))
 
@@ -73,12 +75,13 @@ lb_diff <- lb_sub %>%
   )
 
 
-# Replace original rows with new records
-admiral_lb <- lb %>%
+## Replace original rows with new records ----
+lb <- lb_orig %>%
   anti_join(lb_diff, by = c("USUBJID", "VISIT", "LBSEQ")) %>%
   rbind(lb_diff)
 
-attr(admiral_lb, "label") <- "Laboratory Test Results"
+# Label dataset ----
+attr(lb, "label") <- "Laboratory Test Results"
 
-lb <- admiral_lb
-save(lb, file = "data/lb.rda", compress = "bzip2")
+# Save dataset ----
+usethis::use_data(lb, overwrite = TRUE)
diff --git a/data-raw/mh.R b/data-raw/mh.R
@@ -1,24 +1,31 @@
-# Add new variables to MH
+# Dataset: mh
+# Description: Standard MH dataset from CDISC pilot study with variables MENDTC, MHPRESP, MHOCCUR etc
+
+# Load libraries -----
 library(metatools)
 library(lubridate)
 library(haven)
 library(admiral)
 library(dplyr)
 
-data("dm")
+# CReate mh ----
 sdtm_path <- "https://github.com/cdisc-org/sdtm-adam-pilot-project/blob/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/tabulations/sdtm/" # nolint
 raw_mh <- read_xpt(paste0(sdtm_path, "mh", ".xpt?raw=true"))
 
-# Convert blank to NA
+## Get dm ----
+data("dm")
+
+## Convert blank to NA ----
 dm <- convert_blanks_to_na(dm) %>%
   select(STUDYID, USUBJID, RFSTDTC, RFENDTC, RFXSTDTC, RFXENDTC)
-mh <- convert_blanks_to_na(raw_mh)
-# Set seed so that result stays the same for each run
+mh_orig <- convert_blanks_to_na(raw_mh)
+
+## Set seed so that result stays the same for each run ----
 set.seed(1)
 ran_int <- sample.int(400, nrow(raw_mh), replace = TRUE)
 
-
-admiral_mh <- mh %>%
+## Add new variables ----
+mh1 <- mh_orig %>%
   # Add MHENDTC
   mutate(MHENDTC = as.character(as.Date(MHSTDTC) + days(ran_int))) %>%
   # Add MHPRESP
@@ -72,6 +79,9 @@ admiral_mh <- mh %>%
     MHSTAT = "Completion Status"
   )
 
-attr(admiral_mh, "label") <- "Medical History"
-mh <- admiral_mh
-save(mh, file = "data/mh.rda", compress = "bzip2")
+# Label dataset ----
+attr(mh1, "label") <- "Medical History"
+mh <- mh1
+
+# Save dataset ----
+usethis::use_data(mh, overwrite = TRUE)