#8 added functions and excel_ref data

TelethonKids · May 21, 2019 · 5f67531 · 5f67531
1 parent 5347fb8
commit 5f67531
Show file tree

Hide file tree

Showing 13 changed files with 194 additions and 54 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -40,4 +40,4 @@ Imports:
 Suggests:
     rmarkdown
 VignetteBuilder: knitr
-RoxygenNote: 6.1.0
+RoxygenNote: 6.1.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,9 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
+export(format_model)
 export(group_interval)
 export(meansd)
 export(nPer)
 export(roll_up)
+export(round_df)
 export(scale_color_telethonkids)
 export(scale_fill_telethonkids)
 export(telethonkids_colours)
@@ -16,7 +18,12 @@ import(dplyr)
 import(ggplot2)
 import(tibble)
 import(tidyr)
+importFrom(broom,tidy)
 importFrom(data.table,shift)
+importFrom(dplyr,left_join)
+importFrom(dplyr,mutate)
+importFrom(dplyr,mutate_at)
+importFrom(dplyr,select)
 importFrom(grDevices,colorRampPalette)
 importFrom(lubridate,as.duration)
 importFrom(lubridate,interval)

diff --git a/R/data.R b/R/data.R
@@ -1,11 +1,23 @@
+#' @title Excel column reference map
+#' @description A data table of alpha/numeric key/value pairs, where key is the
+#' letter sequence of the excel column. Range of letter sequence is A - ZZZ.
+#' Usage: excel_ref["ABC"]$numbers that return the column number of that letter
+#' sequence.
+#' @format A data table with 18278 rows and 2 columns:
+#' \describe{
+#'   \item{\code{letters}}{character Excel column reference letters}
+#'   \item{\code{numbers}}{integer corresponding column number}
+#' }
+"excel_ref"
+
 #' @title ICD-10 hash table
 #' @description A data.table that maps ICD-10 classification code to the disease
 #'   name, disease category, parent category and disease group. Codes were
 #'   sourced from GNU Health (downloaded June-2018).
 #'
 #' @format A data frame with 6820 rows and 5 variables:
 #' \describe{
-#'   \item{\code{id}}{(character key ICD-10 codes WITHOUT decimal points}
+#'   \item{\code{id}}{character key ICD-10 codes WITHOUT decimal points}
 #'   \item{\code{name}}{character disease name}
 #'   \item{\code{disease_category}}{character disease category}
 #'   \item{\code{parent_category}}{character parent disease category}

diff --git a/R/format_model.R b/R/format_model.R
@@ -0,0 +1,35 @@
+#' Format a model for quick in-text referencing
+#'
+#' This function builds on `broom::tidy()` by adding formatted strings ready for use in text.
+#'
+#' @param x a model object
+#' @param effects (default NULL) a tidy effects option e.g. "fixed", "random"
+#' @param conf.level (default 0.95) the reported confidence interval of model estimates
+#' @param dp (default 2) the number of decimal points reported in returned strings
+#'
+#' @return a tibble with default `tidy()` parameters plus `conf.low`, `conf.high`, `text1`, and `text2`
+#'
+#' @importFrom broom tidy
+#' @importFrom dplyr mutate select left_join
+#'
+#' @examples
+#' \dontrun{
+#' data(iris)
+#' m1 <- lm(Sepal.Length ~ Sepal.Width, data = iris)
+#'
+#' format_model(m1) %>% select(term, estimate, conf.low, conf.high)
+#'
+#' format_model(m1)$text1
+#' format_model(m1)$text2
+#' }
+#'
+#' @export
+format_model <- function(x, effects = NULL, conf.level = 0.95, dp = 2) {
+  a <- broom::tidy(x, effects = effects, conf.int = T, conf.level = conf.level)
+  b <- a %>%
+    round_df(dp) %>%
+    dplyr::mutate(text1 = paste0(estimate, " (", 100 * conf.level, "% CI: ", conf.low, " to ", conf.high, ")"),
+                  text2 = paste0("(", estimate, "; , ", 100 * conf.level, "% CI: ", conf.low, " to ", conf.high, ")")) %>%
+    dplyr::select(term, text1, text2)
+  dplyr::left_join(a, b, by = "term")
+}
diff --git a/R/round_df.R b/R/round_df.R
@@ -0,0 +1,25 @@
+#' Round data frame decimal points
+#'
+#' Round the numeric columns of a data frame to the specified number of decimal
+#' points using the `sprintf` function.
+#'
+#' @param df the data frame to be rounded
+#' @param dp (default 2) the desired number of decimal points
+#'
+#' @return a data frame, the rounded columns are convertd to character objects.
+#'
+#' @importFrom dplyr mutate_at
+#'
+#' @examples
+#' \dontrun{
+#' data(mtcars)
+#'
+#' head( round_df(mtcars, 1) )
+#' }
+#'
+#' @export
+round_df <- function(df, dp = 2) {
+  mutate_at(df,
+            names(df)[vapply(df, is.numeric, FUN.VALUE = logical(1), USE.NAMES = F)],
+            .funs = list(~sprintf(paste0("%0.", dp, "f"), .)))
+}
diff --git a/data/excel_ref.rda b/data/excel_ref.rda
diff --git a/inst/rmarkdown/templates/analysis_plan/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/analysis_plan/skeleton/skeleton.Rmd
@@ -4,7 +4,7 @@ subtitle: "Biometrics Analysis Plan"
 author:
   - "Author&nbsp;1<sup>1</sup> and Author&nbsp;2<sup>2</sup>"
 affiliation:
-  - "<sup>1</sup>Telethon Kids Institute, Biometrics Group"
+  - "<sup>1</sup>Telethon Kids Institute, The University of Western Australia, Perth, Western Australia, Australia"
   - "<sup>2</sup>Other Affiliation(s)"
 date: "`r format(Sys.time(), '%d %B %Y')`"
 logo: assets/images/logo800.jpg
@@ -73,15 +73,15 @@ figures(name = "fig1", caption = "Caption.")
 #   In-text reference (e.g. Table 1): `r tables("tab1", display = "cite")`
 #   Caption number only (e.g. 1): `r tables("tab1", display = "num")`
 
-#### Models ----
-
-# Pre-run models here (if needed)
-
 #### Data wrangling ----
 
 # All data wrangling should be done in the munge directory, but any further data
 # manipulation can be done here.
 
+#### Models ----
+
+# Pre-run models here (if needed)
+
 ```
 
 # Overview

diff --git a/inst/rmarkdown/templates/analysis_report/skeleton/assets/bib/packages.bib b/inst/rmarkdown/templates/analysis_report/skeleton/assets/bib/packages.bib
@@ -9,9 +9,9 @@ @Manual{CiteR
 }
 @Manual{R-biometrics,
   title = {biometrics: Report Templates for the Telethon Kids Institute},
-  author = {Paul Stevenson and Matt Cooper},
+  author = {Paul Stevenson and Matthew Cooper},
   year = {2019},
-  note = {R package version 1.0.3},
+  note = {R package version 1.0.3.999},
 }
 @Manual{R-broom,
   title = {broom: Convert Statistical Analysis Objects into Tidy Tibbles},

diff --git a/inst/rmarkdown/templates/analysis_report/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/analysis_report/skeleton/skeleton.Rmd
@@ -4,7 +4,7 @@ subtitle: "Biometrics Analysis Report"
 author:
   - "Author&nbsp;1<sup>1</sup> and Author&nbsp;2<sup>2</sup>"
 affiliation:
-  - "<sup>1</sup>Telethon Kids Institute, Biometrics Group"
+  - "<sup>1</sup>Telethon Kids Institute, The University of Western Australia, Perth, Western Australia, Australia"
   - "<sup>2</sup>Other Affiliation(s)"
 date: "`r format(Sys.time(), '%d %B %Y')`"
 logo: assets/images/logo800.jpg
@@ -56,35 +56,6 @@ source("assets/R/hooks.R")
 
 # source("../R/99-helper.R")
 
-mod_to_tab <- function(x, effects = NULL, tails = 0.975) {
-  x %>% 
-    tidy(effects = effects) %>% 
-    mutate(ci.lo = estimate - std.error * qnorm(tails),
-           ci.hi = estimate + std.error * qnorm(tails))
-}
-
-round_df <- function(df, digits) {
-  mutate_at(df,
-            names(df)[vapply(df, is.numeric, FUN.VALUE = logical(1), USE.NAMES = F)],
-            .funs = list(~sprintf(paste0("%0.", digits, "f"), .)))
-}
-
-library(data.table) # maybe just add as a data object to package
-excel_ref <- bind_rows(
-  tibble(letters = LETTERS),
-  expand.grid(LETTERS, LETTERS, stringsAsFactors = F) %>%
-    as_tibble() %>%
-    mutate(letters = paste0(Var2, Var1)) %>%
-    select(letters),
-  expand.grid(LETTERS, LETTERS, LETTERS, stringsAsFactors = F) %>%
-    as_tibble() %>%
-    mutate(letters = paste0(Var3, Var2, Var1)) %>%
-    select(letters)
-) %>%
-  mutate(numbers = 1:nrow(.)) %>%
-  as.data.table()
-setkey(excel_ref, numbers)
-
 #### Load data ----
 
 # load("../cache/dat.RData")
@@ -93,16 +64,25 @@ setkey(excel_ref, numbers)
 
 figures <- captioner(prefix = "Figure")
 tables <- captioner(prefix = "Table")
+models <- captioner(prefix = "Model")
 
 tables(name = "tab1", caption = "Caption")
+
 figures(name = "fig1", caption = "Caption.")
 figures(name = "fig2", caption = "Example of 'jtools::plot_summs()'.")
 
+models(name = "mod1", caption = "Caption.")
+
 # Cite in text with (replacing tab1 with the relevant table/figure name:
 #   Full caption: `r tables("tab1")`
 #   In-text reference (e.g. Table 1): `r tables("tab1", display = "cite")`
 #   Caption number only (e.g. 1): `r tables("tab1", display = "num")`
 
+#### Data wrangling ----
+
+# All data wrangling should be done in the munge directory, but any further data
+# manipulation can be done here.
+
 #### Models ----
 
 # Pre-run models here (if needed)
@@ -111,22 +91,20 @@ m1 <- lm(Sepal.Length ~ Sepal.Width, data = filter(iris, Species == "setosa"))
 m2 <- lm(Sepal.Length ~ Sepal.Width, data = filter(iris, Species == "versicolor"))
 m3 <- lm(Sepal.Length ~ Sepal.Width, data = filter(iris, Species == "virginica"))
 
-models <- bind_rows(
-  mod_to_tab(m1) %>% mutate(Species = "setosa"),
-  mod_to_tab(m2) %>% mutate(Species = "versicolor"),
-  mod_to_tab(m3) %>% mutate(Species = "virginica")
+models <- list(
+  m1 = format_model(m1),
+  m2 = format_model(m2),
+  m2 = format_model(m3)
 )
 
-#### Data wrangling ----
-
-# All data wrangling should be done in the munge directory, but any further data
-# manipulation can be done here.
+# print coefficients: `r select(models$m1, term, estimate, conf.low, conf.hi) %>% round_df(2)`
+# in-text citation: `r filter(models$m1, term == "Sepal.Width")$text1`
 
 ```
 
 # Overview
 
-XXX from the XXX Research Team is looking to engage in consultation with the Telethon Kids Institute's Biometrics team to undertake statistical analysis to determine ***brief project summary***.
+<mark>XXX</mark> from the <mark>XXX</mark> Research Team is looking to engage in consultation with the Telethon Kids Institute's Biometrics team to undertake statistical analysis to determine ***brief project summary***.
 
 ***Study overview to put the analysis in context.***
 
@@ -137,7 +115,7 @@ XXX from the XXX Research Team is looking to engage in consultation with the Tel
 
 # Summary of Findings
 
-<!-- Brief parragraph(s) that could be copy/pasted by the researcher into a manuscript, more detailed analysis is provided in the "commentary" section below -->
+<!-- Brief paragraph(s) that could be copy/pasted by the researcher into a manuscript, more detailed analysis is provided in the "commentary" section below -->
 
 # Tables
 
@@ -163,7 +141,7 @@ ggplot(iris, aes(Petal.Width, Petal.Length, color = Species)) +
 
 **`r figures("fig1")`**
 
-```{r q2_estimates_ci_fine, out.extra = "figure", echo = F, warning = F, message = F}
+```{r figure_2, out.extra = "figure", echo = F, warning = F, message = F}
 # TKI light palette (hex): c("#FED141", "#ED8B00", "#E0457B", "#C800A1", "#7566A0", "#6CACE4", "#00B2A9", "#78BE20", "#DBE442")
 # TKI dark palette (hex): c("#F1B434", "#CB6015", "#B52555", "#A12D86", "#4C4184", "#426DA9", "#008578", "#658D1B", "#BFB800")
 # TKI typography palette (hex): c("#B1B3B3", "#97999B", "#75787B", "#53565A")
@@ -190,9 +168,8 @@ plot_summs(m1, m2, m3,
 <!-- A list of models prepared during the analysis. -->
 
 ```{r model_1, table = T, results = "hide", eval = F}
-models %>%
-  filter(Species == "setosa") %>%
-  select(term, estimate, std.error, statistic, ci.lo, ci.hi) %>%
+models$m1 %>%
+  select(term, estimate, conf.low, conf.high) %>%
   round_df(2)
 
 ```

diff --git a/man/ICD10.Rd b/man/ICD10.Rd
diff --git a/man/excel_ref.Rd b/man/excel_ref.Rd
diff --git a/man/format_model.Rd b/man/format_model.Rd
diff --git a/man/round_df.Rd b/man/round_df.Rd