Deploying to gh-pages from @ 4b38d16 🚀

OuhscBbmc · Oct 22, 2023 · 3343d68 · 3343d68
1 parent f3957fe
commit 3343d68
Show file tree

Hide file tree

Showing 6 changed files with 320 additions and 20 deletions.
diff --git a/ch-snippets.Rmd b/ch-snippets.Rmd
@@ -110,6 +110,106 @@ ds <- vroom::vroom(
 rm(col_types)
 ```
 
+
+Row Operations {#snippets-row}
+------------------------------------
+
+We frequently have to find the mean or sum across columns (within a row).  
+If 
+Finding mean across a lot of columns
+
+Here are several approaches for finding the mean across columns, without naming each column.  Some remarks:
+
+* `m1` & `m2` are sanity checks for this example.  
+  `m1` would be clumsy if you have 10+ items.  
+  `m2` is discouraged because it's brittle.  
+  A change in the column order could alter the calculation.
+  We prefer to use `grep()` to specify a sequence of items.
+* Especially for large datasets, 
+  I’d lean towards `m3` if the items are reasonably complete and 
+  `m4` if some participants are missing enough items that their summary score is fishy.
+  In the approaches below, `m4` and `m6` return the mean only if the participant completed 2 or more items.
+* `dplyr::rowwise()` is convenient, but slow for large datasets.
+* If you need a more complex function that’s too clumsy to include directly in a `mutate()` statement, 
+  see how the calculation for `m6` is delegated to the external function, `f6`.
+* The technique behind `nonmissing` is pretty cool, 
+  because you can apply an arbitrary function on each cell before they’re summed/averaged.  
+* This is in contrast to `f6()`, which applies to an entire (row-wise) data.frame.
+
+```r
+# Isolate the columns to average.  Remember the `grep()` approach w/ `colnames()`
+columns_to_average <- c("hp", "drat", "wt")
+
+f6 <- function(x) {
+  # browser()
+  s <- sum(x, na.rm = TRUE)
+  n <- sum(!is.na(x))
+
+  dplyr::if_else(
+    2L <= n,
+    s / n,
+    NA_real_
+  )
+}
+
+mtcars |>
+  dplyr::mutate(
+    m1 = (hp + drat + wt) / 3,
+    m2 =
+      rowMeans(
+        dplyr::across(hp:wt), # All columns between hp & wt.
+        na.rm = TRUE
+      ),
+    m3 =
+      rowMeans(
+        dplyr::across(!!columns_to_average),
+        na.rm = TRUE
+      ),
+    s4 = # Finding the sum (used by m4)
+      rowSums(
+        dplyr::across(!!columns_to_average),
+        na.rm = TRUE
+      ),
+    nonmissing =
+      rowSums(
+        dplyr::across(
+          !!columns_to_average,
+          .fns = \(x) { !is.na(x) }
+        )
+      ),
+    m4 = 
+      dplyr::if_else(
+        2 <= nonmissing,
+        s4 / nonmissing,
+        NA_real_
+      )
+  ) |>
+  dplyr::rowwise() |> # Required for `m5`
+  dplyr::mutate(
+    m5 = mean(dplyr::c_across(dplyr::all_of(columns_to_average))),
+  ) |>
+  dplyr::ungroup() |> # Clean up after rowwise()
+  dplyr::rowwise() |> # Required for `m6`
+  dplyr::mutate(
+    m6 = f6(dplyr::across(!!columns_to_average))
+  ) |>
+  dplyr::ungroup() |>   # Clean up after rowwise()
+  dplyr::select(
+    hp,
+    drat,
+    wt,
+    m1,
+    m2, 
+    m3, 
+    s4,
+    nonmissing,
+    m4,
+    m5, 
+    m6,
+  )
+```
+
+
 Grooming {#snippets-grooming}
 ------------------------------------
 

diff --git a/docs/ch-snippets.md b/docs/ch-snippets.md
@@ -110,6 +110,106 @@ ds <- vroom::vroom(
 rm(col_types)
 ```
 
+
+Row Operations {#snippets-row}
+------------------------------------
+
+We frequently have to find the mean or sum across columns (within a row).  
+If 
+Finding mean across a lot of columns
+
+Here are several approaches for finding the mean across columns, without naming each column.  Some remarks:
+
+* `m1` & `m2` are sanity checks for this example.  
+  `m1` would be clumsy if you have 10+ items.  
+  `m2` is discouraged because it's brittle.  
+  A change in the column order could alter the calculation.
+  We prefer to use `grep()` to specify a sequence of items.
+* Especially for large datasets, 
+  I’d lean towards `m3` if the items are reasonably complete and 
+  `m4` if some participants are missing enough items that their summary score is fishy.
+  In the approaches below, `m4` and `m6` return the mean only if the participant completed 2 or more items.
+* `dplyr::rowwise()` is convenient, but slow for large datasets.
+* If you need a more complex function that’s too clumsy to include directly in a `mutate()` statement, 
+  see how the calculation for `m6` is delegated to the external function, `f6`.
+* The technique behind `nonmissing` is pretty cool, 
+  because you can apply an arbitrary function on each cell before they’re summed/averaged.  
+* This is in contrast to `f6()`, which applies to an entire (row-wise) data.frame.
+
+```r
+# Isolate the columns to average.  Remember the `grep()` approach w/ `colnames()`
+columns_to_average <- c("hp", "drat", "wt")
+
+f6 <- function(x) {
+  # browser()
+  s <- sum(x, na.rm = TRUE)
+  n <- sum(!is.na(x))
+
+  dplyr::if_else(
+    2L <= n,
+    s / n,
+    NA_real_
+  )
+}
+
+mtcars |>
+  dplyr::mutate(
+    m1 = (hp + drat + wt) / 3,
+    m2 =
+      rowMeans(
+        dplyr::across(hp:wt), # All columns between hp & wt.
+        na.rm = TRUE
+      ),
+    m3 =
+      rowMeans(
+        dplyr::across(!!columns_to_average),
+        na.rm = TRUE
+      ),
+    s4 = # Finding the sum (used by m4)
+      rowSums(
+        dplyr::across(!!columns_to_average),
+        na.rm = TRUE
+      ),
+    nonmissing =
+      rowSums(
+        dplyr::across(
+          !!columns_to_average,
+          .fns = \(x) { !is.na(x) }
+        )
+      ),
+    m4 = 
+      dplyr::if_else(
+        2 <= nonmissing,
+        s4 / nonmissing,
+        NA_real_
+      )
+  ) |>
+  dplyr::rowwise() |> # Required for `m5`
+  dplyr::mutate(
+    m5 = mean(dplyr::c_across(dplyr::all_of(columns_to_average))),
+  ) |>
+  dplyr::ungroup() |> # Clean up after rowwise()
+  dplyr::rowwise() |> # Required for `m6`
+  dplyr::mutate(
+    m6 = f6(dplyr::across(!!columns_to_average))
+  ) |>
+  dplyr::ungroup() |>   # Clean up after rowwise()
+  dplyr::select(
+    hp,
+    drat,
+    wt,
+    m1,
+    m2, 
+    m3, 
+    s4,
+    nonmissing,
+    m4,
+    m5, 
+    m6,
+  )
+```
+
+
 Grooming {#snippets-grooming}
 ------------------------------------
 

diff --git a/docs/example-chapter.html b/docs/example-chapter.html
@@ -113,7 +113,7 @@ <h1>
 <p><em>This intro was copied from the 1st chapter of the example bookdown repo. I’m keeping it temporarily for reference.</em></p>
 <p>You can label chapter and section titles using <code>{#label}</code> after them, e.g., we can reference the <a href="index.html#intro">Intro</a> Chapter. If you do not manually label them, there will be automatic labels anyway</p>
 <p>Figures and tables with captions will be placed in <code>figure</code> and <code>table</code> environments, respectively.</p>
-<div class="sourceCode" id="cb101"><pre class="downlit sourceCode r">
+<div class="sourceCode" id="cb102"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/graphics/par.html">par</a></span><span class="op">(</span>mar <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span><span class="op">(</span><span class="fl">4</span>, <span class="fl">4</span>, <span class="fl">.1</span>, <span class="fl">.1</span><span class="op">)</span><span class="op">)</span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/graphics/plot.default.html">plot</a></span><span class="op">(</span><span class="va">pressure</span>, type <span class="op">=</span> <span class="st">'b'</span>, pch <span class="op">=</span> <span class="fl">19</span><span class="op">)</span></span></code></pre></div>
 <div class="figure" style="text-align: center">
@@ -123,7 +123,7 @@ <h1>
 </p>
 </div>
 <p>Reference a figure by its code chunk label with the <code>fig:</code> prefix, e.g., see Figure <a href="example-chapter.html#fig:nice-fig">G.1</a>. Similarly, you can reference tables generated from <code><a href="https://rdrr.io/pkg/knitr/man/kable.html">knitr::kable()</a></code>, e.g., see Table <a href="example-chapter.html#tab:nice-tab">G.1</a>.</p>
-<div class="sourceCode" id="cb102"><pre class="downlit sourceCode r">
+<div class="sourceCode" id="cb103"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="fu">knitr</span><span class="fu">::</span><span class="fu"><a href="https://rdrr.io/pkg/knitr/man/kable.html">kable</a></span><span class="op">(</span></span>
 <span>  <span class="fu"><a href="https://rdrr.io/r/utils/head.html">head</a></span><span class="op">(</span><span class="va">iris</span>, <span class="fl">20</span><span class="op">)</span>, caption <span class="op">=</span> <span class="st">'Here is a nice table!'</span>,</span>
 <span>  booktabs <span class="op">=</span> <span class="cn">TRUE</span></span>

diff --git a/docs/reference-keys.txt b/docs/reference-keys.txt
@@ -275,6 +275,7 @@ snippets-reading
 snippets-reading-excel
 snippets-reading-trailing-comma
 snippets-reading-vroom
+snippets-row
 snippets-grooming
 snippets-grooming-two-year
 snippets-identification

diff --git a/docs/search.json b/docs/search.json