From db06da95de974525f8627be07ec9bbd9a730e947 Mon Sep 17 00:00:00 2001 From: Melissa Date: Thu, 11 Jul 2024 21:52:43 +0000 Subject: [PATCH] copying to student repo --- materials/tutorial_06/tests_tutorial_06.R | 665 ++++ materials/tutorial_06/tutorial_06.ipynb | 2848 ++++++++++++++++++ materials/worksheet_06/data/hcv-data-set.csv | 612 ++++ materials/worksheet_06/tests_worksheet_06.R | 871 ++++++ materials/worksheet_06/worksheet_06.ipynb | 2717 +++++++++++++++++ 5 files changed, 7713 insertions(+) create mode 100644 materials/tutorial_06/tests_tutorial_06.R create mode 100644 materials/tutorial_06/tutorial_06.ipynb create mode 100644 materials/worksheet_06/data/hcv-data-set.csv create mode 100644 materials/worksheet_06/tests_worksheet_06.R create mode 100644 materials/worksheet_06/worksheet_06.ipynb diff --git a/materials/tutorial_06/tests_tutorial_06.R b/materials/tutorial_06/tests_tutorial_06.R new file mode 100644 index 0000000..a915792 --- /dev/null +++ b/materials/tutorial_06/tests_tutorial_06.R @@ -0,0 +1,665 @@ +# --- +# jupyter: +# jupytext: +# formats: r:light +# text_representation: +# extension: .r +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.5.2 +# kernelspec: +# display_name: R +# language: R +# name: ir +# --- + +library(digest) +library(testthat) + +# + +# Question 1.6 + +test_1.6 <- function() { + test_that('Did not assign answer to an object called "max_flow_result_plot"', { + expect_true(exists("max_flow_result_plot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(max_flow_result_plot)) + }) + + properties <- c(max_flow_result_plot$layers[[1]]$mapping, max_flow_result_plot$mapping) + + test_that("Plot should have stat on the x-axis", { + expect_true("stat" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomBar" %in% class(max_flow_result_plot$layers[[1]]$geom)) + + # Remove if not needed: + expect_true("GeomSegment" %in% class(max_flow_result_plot$layers[[3]]$geom)) + # expect_true("GeomVline" %in% class(steam_ci_plot$layers[[2]]$geom)) + }) + + test_that("Plot does not have the correct bin width", { + expect_equal( + digest(as.integer(mget("stat_params", max_flow_result_plot$layers[[1]])[["stat_params"]][["bins"]])), + "71db8a6cad03244e6e50f0ad8bc95a65" + ) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(max_flow_result_plot$data)), "b6a6227038bf9be67533a45a6511cc7e") + expect_equal(digest(round(sum(max_flow_result_plot$data$stat))), "9576b6777b306f3bf8649d96d80029dc") + + # If stat is not known: + # expect_equal(digest(round(sum(pull(max_flow_result_plot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(max_flow_result_plot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(max_flow_result_plot$labels)) + }) + + print("Success!") +} + + +# + +# Question 1.7 + +test_1.7 <- function() { + test_that('Did not assign answer to an object called "answer1.7"', { + expect_true(exists("answer1.7")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(answer1.7)) + }) + + expected_colnames <- c("p_value") + given_colnames <- colnames(answer1.7) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(answer1.7))), "4b5630ee914e848e8d07221556b0a2fb") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(answer1.7$p_value) * 10e6)), "8b9f16c5c4263ff92734e5885038181d") + }) + + print("Success!") +} + +# + +# Question 1.9 + +test_1.9 <- function() { + test_that('Did not assign answer to an object called "answer1.9"', { + expect_true(exists("answer1.9")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer1.9, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer1.9)) + test_that("Solution is incorrect", { + expect_equal(answer_hash, "d110f00cfb1b248e835137025804a23b") + }) + + print("Success!") +} + +# + +# Question 1.10 + +test_1.10 <- function() { + test_that('Did not assign answer to an object called "mean_max_bootstrap_dist"', { + expect_true(exists("mean_max_bootstrap_dist")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(mean_max_bootstrap_dist)) + }) + + expected_colnames <- c("replicate", "stat") + given_colnames <- colnames(mean_max_bootstrap_dist) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(mean_max_bootstrap_dist))), "b6a6227038bf9be67533a45a6511cc7e") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(mean_max_bootstrap_dist$stat))), "859a89ee67987480a3e83dcbff13ef15") + }) + + print("Success!") +} + +# + +# Question 1.11 + +test_1.11 <- function() { + test_that('Did not assign answer to an object called "mean_max_flow_ci"', { + expect_true(exists("mean_max_flow_ci")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(mean_max_flow_ci)) + }) + + expected_colnames <- c("lower_ci", "upper_ci") + given_colnames <- colnames(mean_max_flow_ci) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(mean_max_flow_ci))), "4b5630ee914e848e8d07221556b0a2fb") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(mean_max_flow_ci$lower_ci) * 10e6)), "a4032cdef8c2b688bb01ff5b0ddddb7d") + expect_equal(digest(as.integer(sum(mean_max_flow_ci$upper_ci) * 10e4)), "c670fbe8dc5444d2e457cbf4f4991c1e") + }) + + print("Success!") +} + +# + +# Question 1.12 + +test_1.12 <- function() { + test_that('Did not assign answer to an object called "mean_flow_ci_plot"', { + expect_true(exists("mean_flow_ci_plot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(mean_flow_ci_plot)) + }) + + properties <- c(mean_flow_ci_plot$layers[[1]]$mapping, mean_flow_ci_plot$mapping) + + test_that("Plot should have stat on the x-axis", { + expect_true("stat" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomBar" %in% class(mean_flow_ci_plot$layers[[1]]$geom)) + + # Remove if not needed: + expect_true("GeomRect" %in% class(mean_flow_ci_plot$layers[[2]]$geom)) + # expect_true("GeomVline" %in% class(mean_flow_ci_plot$layers[[2]]$geom)) + }) + + test_that("Plot does not have the correct bin width", { + expect_equal( + digest(as.integer(mget("stat_params", mean_flow_ci_plot$layers[[1]])[["stat_params"]][["binwidth"]])), + "3e2e4a08c44d0224de5b7e668c75ace3" + ) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(mean_flow_ci_plot$data)), "b6a6227038bf9be67533a45a6511cc7e") + expect_equal(digest(round(sum(mean_flow_ci_plot$data$stat))), "5cb36e46ad96537202f5b6df950ca6da") + + # If stat is not known: + # expect_equal(digest(round(sum(pull(mean_flow_ci_plot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(mean_flow_ci_plot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(mean_flow_ci_plot$labels)) + }) + + print("Success!") +} + +# + +# Question 2.1 + +test_2.1 <- function() { + test_that('Did not assign answer to an object called "adelie_chinstrap_flipper"', { + expect_true(exists("adelie_chinstrap_flipper")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(adelie_chinstrap_flipper)) + }) + + expected_colnames <- c("species", "flipper_length_mm") + given_colnames <- colnames(adelie_chinstrap_flipper) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(adelie_chinstrap_flipper))), "7a508917c5a0cf5111df42bb714e32a8") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(adelie_chinstrap_flipper$flipper_length_mm))), "a1d0138b85cde684c02ba8726d80d9e1") + }) + + print("Success!") +} + +# + +# Question 2.4 + +test_2.4 <- function() { + test_that('Did not assign answer to an object called "penguin_count"', { + expect_true(exists("penguin_count")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(penguin_count)) + }) + + expected_colnames <- c("species", "n") + given_colnames <- colnames(penguin_count) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(penguin_count))), "c01f179e4b57ab8bd9de309e6d576c48") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(penguin_count$n))), "7a508917c5a0cf5111df42bb714e32a8") + }) + + print("Success!") +} + +# + +# Question 2.5 + +test_2.5 <- function() { + test_that('Did not assign answer to an object called "obs_diff_in_medians"', { + expect_true(exists("obs_diff_in_medians")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(obs_diff_in_medians)) + }) + + expected_colnames <- c("stat") + given_colnames <- colnames(obs_diff_in_medians) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(obs_diff_in_medians))), "4b5630ee914e848e8d07221556b0a2fb") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(obs_diff_in_medians$stat))), "a9a8883dac7a645a24f4ed180029e4a9") + }) + + print("Success!") +} +# - + +# Question 2.6 +test_2.6 <- function() { + test_that('Did not assign answer to an object called "null_diff_in_medians"', { + expect_true(exists("null_diff_in_medians")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(null_diff_in_medians)) + }) + + expected_colnames <- c("replicate", "stat") + given_colnames <- colnames(null_diff_in_medians) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(null_diff_in_medians))), "b6a6227038bf9be67533a45a6511cc7e") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(null_diff_in_medians$stat))), "0b9349e1f2980e47f00ec4dfd6b4e17c") + }) + + print("Success!") +} + +# + +# Question 2.7 + +test_2.7 <- function() { + test_that('Did not assign answer to an object called "diff_in_medians_plot"', { + expect_true(exists("diff_in_medians_plot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(diff_in_medians_plot)) + }) + + properties <- c(diff_in_medians_plot$layers[[1]]$mapping, diff_in_medians_plot$mapping) + + test_that("Plot should have stat on the x-axis", { + expect_true("stat" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomBar" %in% class(diff_in_medians_plot$layers[[1]]$geom)) + + # Remove if not needed: + expect_true("GeomArea" %in% class(diff_in_medians_plot$layers[[2]]$geom)) + # expect_true("GeomVline" %in% class(steam_ci_plot$layers[[2]]$geom)) + }) + + test_that("Plot does not have the correct bin width", { + expect_equal( + digest(as.integer(mget("stat_params", diff_in_medians_plot$layers[[1]])[["stat_params"]][["bins"]])), + "71db8a6cad03244e6e50f0ad8bc95a65" + ) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(diff_in_medians_plot$data)), "b6a6227038bf9be67533a45a6511cc7e") + expect_equal(digest(round(sum(diff_in_medians_plot$data$stat))), "b1bac09bc62ea76ee70585e114b57cce") + + # If stat is not known: + # expect_equal(digest(round(sum(pull(diff_in_medians_plot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(diff_in_medians_plot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(diff_in_medians_plot$labels)) + }) + + print("Success!") +} + +# + +# Question 2.8 + +test_2.8 <- function() { + test_that('Did not assign answer to an object called "answer2.8"', { + expect_true(exists("answer2.8")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(answer2.8)) + }) + + expected_colnames <- c("p_value") + given_colnames <- colnames(answer2.8) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(answer2.8))), "4b5630ee914e848e8d07221556b0a2fb") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(answer2.8$p_value) * 10e6)), "1473d70e5646a26de3c52aa1abd85b1f") + }) + + print("Success!") +} + +# + +# Question 2.9 + +test_2.9 <- function() { + test_that('Did not assign answer to an object called "answer2.9"', { + expect_true(exists("answer2.9")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer2.9, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer2.9)) + test_that("Solution is incorrect", { + expect_equal(answer_hash, "6e7a8c1c098e8817e3df3fd1b21149d1") + }) + + print("Success!") +} + +# + +# Question 2.11 + +test_2.11 <- function() { + test_that('Did not assign answer to an object called "diff_in_medians_bootstrap_dist"', { + expect_true(exists("diff_in_medians_bootstrap_dist")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(diff_in_medians_bootstrap_dist)) + }) + + expected_colnames <- c("replicate", "stat") + given_colnames <- colnames(diff_in_medians_bootstrap_dist) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(diff_in_medians_bootstrap_dist))), "b6a6227038bf9be67533a45a6511cc7e") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(diff_in_medians_bootstrap_dist$stat))), "dd0dbc05e22c857be679d297cfd1d839") + }) + + print("Success!") +} + +# + +# Question 2.13 + +test_2.13 <- function() { + test_that('Did not assign answer to an object called "diff_in_medians_ci_plot"', { + expect_true(exists("diff_in_medians_ci_plot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(diff_in_medians_ci_plot)) + }) + + properties <- c(diff_in_medians_ci_plot$layers[[1]]$mapping, diff_in_medians_ci_plot$mapping) + + test_that("Plot should have stat on the x-axis", { + expect_true("stat" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomBar" %in% class(diff_in_medians_ci_plot$layers[[1]]$geom)) + + # Remove if not needed: + expect_true("GeomRect" %in% class(diff_in_medians_ci_plot$layers[[2]]$geom)) + # expect_true("GeomVline" %in% class(steam_ci_plot$layers[[2]]$geom)) + }) + + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(diff_in_medians_ci_plot$data)), "b6a6227038bf9be67533a45a6511cc7e") + expect_equal(digest(round(sum(diff_in_medians_ci_plot$data$stat))), "11a1e979f75e9a284b6dfaf559a4ef06") + + # If stat is not known: + # expect_equal(digest(round(sum(pull(diff_in_medians_ci_plot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(diff_in_medians_ci_plot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(diff_in_medians_ci_plot$labels)) + }) + + print("Success!") +} + +# + +# Question 3.1 + +test_3.1 <- function() { + test_that('Did not assign answer to an object called "obs_diff_prop"', { + expect_true(exists("obs_diff_prop")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(obs_diff_prop)) + }) + + expected_colnames <- c("stat") + given_colnames <- colnames(obs_diff_prop) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(obs_diff_prop))), "4b5630ee914e848e8d07221556b0a2fb") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(obs_diff_prop$stat))), "1473d70e5646a26de3c52aa1abd85b1f") + }) + + print("Success!") +} + +# + +# Question 3.2 + +test_3.2 <- function() { + test_that('Did not assign answer to an object called "irradiat_null_distribution"', { + expect_true(exists("irradiat_null_distribution")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(irradiat_null_distribution)) + }) + + expected_colnames <- c("replicate", "stat") + given_colnames <- colnames(irradiat_null_distribution) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(irradiat_null_distribution))), "b6a6227038bf9be67533a45a6511cc7e") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(irradiat_null_distribution$stat) * 10e4)), "29dd644dbe7c349dfa5987eab61afa99") + }) + + print("Success!") +} + +# + +# Question 3.3 + +test_3.3 <- function() { + test_that('Did not assign answer to an object called "irradiate_result_plot"', { + expect_true(exists("irradiate_result_plot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(irradiate_result_plot)) + }) + + properties <- c(irradiate_result_plot$layers[[1]]$mapping, irradiate_result_plot$mapping) + + test_that("Plot should have stat on the x-axis", { + expect_true("stat" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomBar" %in% class(irradiate_result_plot$layers[[1]]$geom)) + + # Remove if not needed: + expect_true("GeomSegment" %in% class(irradiate_result_plot$layers[[3]]$geom)) + # expect_true("GeomVline" %in% class(steam_ci_plot$layers[[2]]$geom)) + }) + + test_that("Plot does not have the correct bin width", { + expect_equal( + digest(as.integer(mget("stat_params", irradiate_result_plot$layers[[1]])[["stat_params"]][["bins"]])), + "71db8a6cad03244e6e50f0ad8bc95a65" + ) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(irradiate_result_plot$data)), "b6a6227038bf9be67533a45a6511cc7e") + expect_equal(digest(round(sum(irradiate_result_plot$data$stat))), "cb6bf34f4fa5893cf9fe8286e81bd32d") + + # If stat is not known: + # expect_equal(digest(round(sum(pull(irradiate_result_plot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(irradiate_result_plot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(irradiate_result_plot$labels)) + }) + + print("Success!") +} + diff --git a/materials/tutorial_06/tutorial_06.ipynb b/materials/tutorial_06/tutorial_06.ipynb new file mode 100644 index 0000000..0cea037 --- /dev/null +++ b/materials/tutorial_06/tutorial_06.ipynb @@ -0,0 +1,2848 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "b1fa3206079bf39210b976ad2776502f", + "grade": false, + "grade_id": "cell-57c92078710d7670", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "# Tutorial 6: Hypothesis Testing\n", + "\n", + "### Lecture and Tutorial Learning Goals\n", + "From this section, students are expected to be able to:\n", + "\n", + "1. Give an example of a question you could answer with a hypothesis test.\n", + "2. Identify potential limitations in the data, arising from the methods of data collection, to answer the question\n", + "3. Specify a null and alternative hypothesis.\n", + "4. Given an inferential question, formulate hypotheses to be used in a hypothesis test.\n", + "5. Identify the correct steps and components of a basic hypothesis test.\n", + "6. Write computer scripts to perform hypothesis testing via simulation, randomization and bootstrapping approaches, as well as interpret the output.\n", + "7. Identify the advantages of simulation/randomization tests when estimating parameters different from proportions and means.\n", + "8. Describe the relationship between confidence intervals and hypothesis testing.\n", + "9. Discuss the potential limitations of these methods." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7e43a30efef2360252cba7ac4d5c01a0", + "grade": false, + "grade_id": "cell-fc07df746fa2acae", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Run this cell before continuing.\n", + "library(cowplot)\n", + "library(datateachr)\n", + "library(digest)\n", + "library(infer)\n", + "library(repr)\n", + "library(taxyvr)\n", + "library(tidyverse)\n", + "library(dplyr)\n", + "library(datateachr)\n", + "penguins <- read.csv(\"https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/inst/extdata/penguins.csv\")\n", + "source(\"tests_tutorial_06.R\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "88060d8ac496cc55a192a70838f7da6e", + "grade": false, + "grade_id": "cell-5452f0b8e3cc9640", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## 1. Annual Maximum Flow Rate of Bow River\n", + "\n", + "  When the snow melts in spring and summer, tons of water are released into the rivers, and floodings occur. One preventative measure is to keep track of the maximum flow of a river each year. For this question, we aim to prevent flooding by first studying the annual maximum daily discharge (in $m^3/s$) at a hydrometric station called Bow River at Banff , which is near Banff, Alberta. The data are saved to the data table flow_sample. Let's preview this dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "530b6198d5db0ee7c236808939074faa", + "grade": false, + "grade_id": "cell-00f0bba26d76ed06", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "?flow_sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e58a73aba2024881beb58c72138344e7", + "grade": false, + "grade_id": "cell-5c3c13289a0b767a", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "head(flow_sample)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "0ee198293c06467eb0bca1cf7e7ccb35", + "grade": false, + "grade_id": "cell-1d4382bf89c3623d", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "A village downstream wants to build a dam to mitigate the effects of annual flooding. To design this dam, we’re interested in studying the distribution of the maximum flow of Bow River at this station. A retired employee, who was monitoring many hydrometric stations in the area, claims that the annual maximum flow is typically around $210 m^3/s$. However, residents in the area claim that the annual maximum flow is typically higher than $210 m^3/s$." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "977555584e553e2b145f78d65ac516a7", + "grade": false, + "grade_id": "cell-eb547880fb23e1e7", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 1.1: Selecting Parameter
\n", + "{points: 2}\n", + "\n", + "Which of the parameters below would be most suitable to investigate and ultimately test the residents’ claim? (Select all that apply)\n", + "\n", + "A. The mean of the annual maximum flow distribution at Bow River\n", + "\n", + "B. The median of the annual maximum flow distribution at Bow River\n", + "\n", + "C. The variance of the annual maximum flow distribution at Bow River\n", + "\n", + "D. The proportion of annual maximum flow values at Bow River exceeding the residents’ claim\n", + "\n", + "_Assign your answer to an object called `answer1.1`. Your answer should be a sequence of characters surrounded by quotes (e.g., \"ABCD\")._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "92b44dd65494d8e0c41e09f02c5f5d46", + "grade": false, + "grade_id": "cell-59d0d769a179b776", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer1.1 <- \"\"\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "01e425eee5a50df4575342548871608a", + "grade": true, + "grade_id": "cell-5b21334104d55645", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "test_that('Did not assign answer to an object called \"answer1.1\"', {\n", + " expect_true(exists(\"answer1.1\"))\n", + " })\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "46bf9b41371063ea1361a3e7593184fd", + "grade": false, + "grade_id": "cell-f8c1adc7133c799b", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "  For now, let us focus on the mean of the annual maximum flow. We want to test hypotheses about the mean at the 5% significance level. Here we assume that the annual maximum flow data originate from a distribution that does not change over the years (due to climate change, tectonic activities, etc)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "59a4591d4eee64ea4bdb9e62bca836e9", + "grade": false, + "grade_id": "cell-d04e69597c43ec20", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 1.2: Null Hypothesis
\n", + "{points: 2}\n", + "\n", + "Which of the following would be an appropriate null hypothesis for us to set, given the residents’ and retired employee’s claims?\n", + "\n", + "A. $H_0$: The mean of the annual maximum flow at Bow River is equal to $210 m^3/s$.\n", + "\n", + "B. $H_0$: The mean of the annual maximum flow at Bow River is greater than $210 m^3/s$.\n", + "\n", + "C. $H_0$: The mean of the annual maximum flow at Bow River is greater than or equal to $210 m^3/s$.\n", + "\n", + "D. $H_0$: The mean of the annual maximum flow at Bow River is NOT equal to $210 m^3/s$.\n", + "\n", + "Your answer should be a string containing one letter.\n", + "\n", + "_Assign your answer to an object called `answer1.2`. Your answer should be a single character surrounded by quotes._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "51cf42e331910c1553435fcde20e2e94", + "grade": false, + "grade_id": "cell-bbdcf0abb3076bb4", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer1.2 <-\"\"\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "560b397b6882cdd316a23ce897fb9f3b", + "grade": true, + "grade_id": "cell-09943b30d60804fe", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "test_that('Did not assign answer to an object called \"answer1.2\"', {\n", + "expect_true(exists(\"answer1.2\"))\n", + "})\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "eb21c3c737aa8ff6540048e77d1b2ec9", + "grade": false, + "grade_id": "cell-0adb8800e06c8afc", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 1.3: Alternative Hypothesis
\n", + "{points: 2}\n", + "\n", + "Which of the following would be an appropriate alternative hypothesis for us to set, given the residents’ and retired employee’s claims?\n", + "\n", + "A. $H_1$: The mean of the annual maximum flow at Bow River is less than $210 m^3/s$.\n", + "\n", + "B. $H_1$: The mean of the annual maximum flow at Bow River is greater than $210 m^3/s$.\n", + "\n", + "C. $H_1$: The mean of the annual maximum flow at Bow River is greater than or equal to $210 m^3/s$.\n", + "\n", + "D. $H_1$: The mean of the annual maximum flow at Bow River is NOT equal to $210 m^3/s$.\n", + "\n", + "Your answer should be a string containing one letter.\n", + "\n", + "_Assign your answer to an object called `answer1.3`. Your answer should be a single character surrounded by quotes._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "ab84a13e7b643e488fbc2816c7c48916", + "grade": false, + "grade_id": "cell-276a5cceb1bb36f8", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer1.3 <-\"\"\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer1.3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "18be84f15c3a1b09b3fa9f576bc69856", + "grade": true, + "grade_id": "cell-54163d50760ddff3", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "\n", + "test_that('Did not assign answer to an object called \"answer1.3\"', {\n", + " expect_true(exists(\"answer1.3\"))\n", + "})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "3dbbb38f885b9b8c96e42ab437881245", + "grade": false, + "grade_id": "cell-e29939b4e2828732", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "  Now we select the maximum flow, keep only the year and the flow columns. We also find the sample size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "25e6bf52fd4f0902ae496c057b89a025", + "grade": false, + "grade_id": "cell-97d809446f479cf4", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Run this code before continuing\n", + "max_flow_sample <- \n", + " flow_sample %>%\n", + " filter(extreme_type == 'maximum') %>%\n", + " select(year, flow) %>% \n", + " rename(maximum_flow = flow)\n", + "\n", + "head(max_flow_sample)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "6d608e43d2a606526b9733fcee959840", + "grade": false, + "grade_id": "cell-67097464296d3c5f", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 1.4
\n", + "{points: 3}\n", + "\n", + "Calculate the observed test statistic from `max_flow_sample` with the `infer` package, specify the response, and use the `calculate` function. Leave your answer as a 1x1 tibble with a column named `stat`.\n", + "\n", + "_Assign your data frame to an object called `observed_mean`. Your data frame should have only one column, `stat`, and one row._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "251af4465bbcf92741da6eb2cd9e5acb", + "grade": false, + "grade_id": "cell-d9f83fc59d078643", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#observed_mean <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "observed_mean " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3ce6d9931580e093593af9baa5a1c15c", + "grade": true, + "grade_id": "cell-c0e444ce1fc5f664", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "test_that('Did not assign answer to an object called \"observed_mean\"', {\n", + " expect_true(exists(\"observed_mean\"))\n", + "})\n", + "\n", + "test_that(\"Solution should be a data frame\", {\n", + " expect_true(\"data.frame\" %in% class(observed_mean))\n", + "})\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "58a5b30e9555ab30deafe2537962c09d", + "grade": false, + "grade_id": "cell-4d47bff6731fa8a8", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 1.5: Simulating from the null distribution
\n", + "{points: 3}\n", + "\n", + "Using the `infer` workflow, generate 1000 samples from the null distribution. Remember the steps:\n", + "\n", + "1. `specify` the response;\n", + "2. `hypothesize`;\n", + "3. `generate` 1000 samples; \n", + "4. and `calculate` the mean of each sample. \n", + "\n", + "_Assign your data frame to an object called `null_max_flow`. Your data frame should have two columns: `replicate` and `stat`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e2e51ae1c6f00e074f7448ab9971f5e4", + "grade": false, + "grade_id": "cell-3c6d6c2eb789d852", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(1432) # Do not change this\n", + "\n", + "#null_max_flow <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(null_max_flow)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3c8813fe4cdd88c8c069c6747bbe5ecd", + "grade": true, + "grade_id": "cell-81b290784b27c6d1", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "\n", + "test_that('Did not assign answer to an object called \"null_max_flow\"', {\n", + " expect_true(exists(\"null_max_flow\"))\n", + " })\n", + "\n", + " test_that(\"Solution should be a data frame\", {\n", + " expect_true(\"data.frame\" %in% class(null_max_flow))\n", + " })\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "8da7605ed3b14286340909019f99bdff", + "grade": false, + "grade_id": "cell-145ec8e33ab901cc", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 1.6
\n", + "{points: 3}\n", + "\n", + "Plot the result of the hypothesis test using `visualize` with 10 bins, put a vertical bar for the observed test statistic, and shade the tail(s). Label the x-axis as `Mean`.\n", + "\n", + "```r\n", + "max_flow_result_plot <- \n", + " null_max_flow %>% \n", + " visualize(bins = ...) + \n", + " shade_p_value(obs_stat = ..., direction = ...) +\n", + " xlab(...)\n", + "```\n", + "\n", + "Assign your answer to an object called `max_flow_result_plot`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "aceafcf818632275a02379a43d1f4f92", + "grade": false, + "grade_id": "cell-2999b00535bb274a", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#max_flow_result_plot <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "max_flow_result_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "08c8ed60e6153c25d561e1600fafe764", + "grade": true, + "grade_id": "cell-0f59fcb4d9912b65", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.6()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "bde4a2296d4709dfead8d499d727fbd4", + "grade": false, + "grade_id": "cell-d24a21794f869182", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 1.7
\n", + "{points: 3}\n", + "\n", + "Use the `get_p_value` function from `infer` package to get the p-value from `null_max_flow`. \n", + "\n", + "```r\n", + "answer1.7 <- \n", + " ... %>% \n", + " get_p_value(obs_stat = ..., direction = ...)\n", + "```\n", + "Assign your answer to an object called `answer1.7`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b59e6e81df7d1f2acab00de082cb4026", + "grade": false, + "grade_id": "cell-cdd5fa2b8770bc9f", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer1.7 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer1.7" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "24fe8e8e72799ade4c1a195c5a7eb9cd", + "grade": true, + "grade_id": "cell-ec34f3b254715300", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.7()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d392d3731191bc2bc3ae97b8d5233a1e", + "grade": false, + "grade_id": "cell-726b2577fe97afcd", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 1.8: Conclusion of the test
\n", + "{points: 3}\n", + "\n", + "What can we conclude based on the result of the hypothesis test?\n", + "\n", + "A. Given a p-value of 0.369 we do not reject the null hypothesis.\n", + "\n", + "B. Given a p-value of 0.369 we reject the null hypothesis.\n", + "\n", + "C. Given a p-value of 0.369 we do not reject the null hypothesis at the 5% significance level.\n", + "\n", + "D. Given a p-value of 0.369 we reject the null hypothesis at the 5% significance level.\n", + "\n", + "_Assign your answer to an object called `answer1.8`. Your response should be a single character surrounded by quotes._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "8f38e7357bf6e59eb460c3a7228978b7", + "grade": false, + "grade_id": "cell-cd83d388742b5c1e", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer1.8 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.8" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b297e4aed32afb5c864dbe1c9abff0c4", + "grade": true, + "grade_id": "cell-c01da46960066eff", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "test_that('Did not assign answer to an object called \"answer1.8\"', {\n", + " expect_true(exists(\"answer1.8\"))\n", + "})\n", + "\n", + "test_that('Solution should be a single character (\"A\", \"B\", \"C\", or \"D\")', {\n", + " expect_match(answer1.8, \"a|b|c|d\", ignore.case = TRUE)\n", + "})\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "722f8cbdaa93dff0cc63824cfdc7bc6d", + "grade": false, + "grade_id": "cell-015747aad9f99ed7", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 1.9: Conclusion at a different significance level\n", + "\n", + "{Points: 3}\n", + "\n", + "If we conducted the test at the 10% significance level instead, would our conclusion have been different?\n", + "\n", + "A. Yes, it would have, the null hypothesis would be rejected.\n", + "\n", + "B. Yes, it would have, the null hypothesis would be accepted.\n", + "\n", + "C. Yes, it would have, the null hypothesis would NOT be rejected.\n", + "\n", + "D. No, it wouldn’t.\n", + "\n", + "Your answer should be a string containing one letter.\n", + "\n", + "_Assign your answer to an object called `answer1.9`. Your answer should be a single character surrounded by quotes._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "1011d3b67e17f1bc7e42ee4556e82a45", + "grade": false, + "grade_id": "cell-ff07ff67d5a1ed83", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer1.9 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer1.9" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a8990b7c5992a94cca6f30e1fc01b167", + "grade": true, + "grade_id": "cell-9f2fd9fe55309574", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.9()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "abfaaf30075ba916c1d297257f433706", + "grade": false, + "grade_id": "cell-ffa16ff57f4b5a8d", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 1.10
\n", + "{points: 3}\n", + "\n", + "Now we would like to find the 90% confidence interval for the mean. First, let's find the bootstrap distribution for the mean by generating 1000 samples. Use the `infer` package and `max_flow_sample` to specify the response, generate 1000 samples, and calculate the mean. \n", + "\n", + "\n", + "_Assign your data frame to an object called `mean_max_bootstrap_dist`. Your data frame should have two columns: `replicate` and `stat`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "326aa36e85e7c49f49eaccda0ef4cc9d", + "grade": false, + "grade_id": "cell-64f8e7f7f47c3a85", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(6882) # Do not change this\n", + "\n", + "#mean_max_bootstrap_dist <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(mean_max_bootstrap_dist)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "bb0458262ae0144211f45388e16870cc", + "grade": true, + "grade_id": "cell-5f0b57fbbf90a7d5", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.10()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a0e7bd2761e5bff702c507a92927f8d7", + "grade": false, + "grade_id": "cell-d56135d0ece3219e", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 1.11
\n", + "{points: 2}\n", + "\n", + "Using the boostrap distribution `mean_max_bootstrap_dist`, find the 90% confidence interval using the quantile function (think of which quantile will be suitable for a 90% CI)\n", + "\n", + "```r\n", + "mean_max_flow_ci <- \n", + " ... %>% \n", + " summarise(lower_ci = ..., upper_ci = ...)\n", + "```\n", + "\n", + "_Assign your data frame to an object called `mean_max_flow_ci`. Your data frame should have two columns: `lower_ci` and `upper_ci`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "301d05c8193cf892ff977ef54e66dd1b", + "grade": false, + "grade_id": "cell-a7989458209c905e", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# mean_max_flow_ci <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "mean_max_flow_ci" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "539e64073c701e4b387fce5279f743cf", + "grade": true, + "grade_id": "cell-c238fc99168e92b5", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.11()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d8aca906fc0413801d5daf70564dd2fd", + "grade": false, + "grade_id": "cell-d5cf433a7e810709", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 1.12
\n", + "{points: 2}\n", + "\n", + "Using the `infer` package, visualize the confidence interval `mean_max_flow_ci` with the bootstrap distribution `mean_max_bootstrap_dist`.\n", + "\n", + "Assign your plot to an object called `mean_flow_ci_plot`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3190b252dca4827532ac278c79bfefe9", + "grade": false, + "grade_id": "cell-5d7f7b1bc05b579e", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# mean_flow_ci_plot <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "mean_flow_ci_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c870e61e40d6ab3fb3b3bce69be56983", + "grade": true, + "grade_id": "cell-4e8a5c776697d937", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.12()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "6b126835e221531953193f262c09af24", + "grade": false, + "grade_id": "cell-43d4cea695e5b6f7", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## 2. Flipper Lengths of Penguins\n", + "\n", + "The dataset `penguins` contains size measurements for adult foraging penguins near Palmer Station, Antarctica. First, let's take a look at the first few rows of this dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3976cf2c5691b63ffa6ec590d18f08b7", + "grade": false, + "grade_id": "cell-9434eda90796aebe", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "head(penguins)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "56175989d491cc29655a70a497b6c2c5", + "grade": false, + "grade_id": "cell-827c6561abf44dc5", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "  We want to study how Adelie and Chinstrap penguins are different. First, we study their flipper lengths (in mm)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "4cca2e07fac483638e9eb33d621e5865", + "grade": false, + "grade_id": "cell-245a0ab4a2611869", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 2.1: Pre-processing
\n", + "{points: 2}\n", + "\n", + "Filter the `penguins` dataset to remove all rows with `NA` in `flipper_length_mm`, keep only the `Adelie` and `Chinstrap` species, and select the two columns `species` and `flipper_length_mm`.\n", + "\n", + "_Assign your data frame to an object called `adelie_chinstrap_flipper`. Your data frame should have only two columns, `species` and `flipper_length_mm`._ " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "dcaa12d43fdc1b2d239bacdf20e30d81", + "grade": false, + "grade_id": "cell-4befd1c3dc62017f", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#adelie_chinstrap_flipper <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(adelie_chinstrap_flipper)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "62f4004efe9827aaf5e4e1271d87d62d", + "grade": true, + "grade_id": "cell-8e8fea9ac0fa10b3", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.1()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "8b4c8ba3eb0c028e2fbd87fe603f9ae6", + "grade": false, + "grade_id": "cell-13b20a70bc1ac634", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 2.2: Null hypothesis
\n", + "{points: 2}\n", + "\n", + "  An ecologist suspects that flipper lengths affect their ability to swim. But are the flipper lengths different between the species? Looking at photos of the two penguin species, some claim that their flippers are generally the same length. However, an ecologist hypothesizes that they may not be the same length. To study the distributions of the flipper lengths of the two species, let's conduct a hypothesis test to examine their difference in medians.\n", + "\n", + "Which of the following would be an appropriate null hypothesis for us to set, given the above situation?\n", + "\n", + "A. $H_0$: The median flipper length of the Adelie penguins is the same as the median flipper length of the Chinstrap penguins.\n", + "\n", + "B. $H_0$: The mean flipper length of the Adelie penguins is the same as the mean flipper length of the Chinstrap penguins.\n", + "\n", + "C. $H_0$: The median flipper length of the Adelie penguins is different from the median flipper length of the Chinstrap penguins.\n", + "\n", + "D. $H_0$: The median flipper length of the Adelie penguins is greater than the median flipper length of the Chinstrap penguins.\n", + "\n", + "Your answer should be a string containing one letter.\n", + "\n", + "_Assign your answer to an object called `answer2.2`. Your answer should be a single character surrounded by quotes._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9c9c151b2500264dcb61cb8fcb01c26a", + "grade": false, + "grade_id": "cell-d2bf2e8a134f2895", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.2 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "30acad59ad0bf964ec75a0b741c380f9", + "grade": true, + "grade_id": "cell-101813e2ab74aecd", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "test_that('Did not assign answer to an object called \"answer2.2\"', {\n", + " expect_true(exists(\"answer2.2\"))\n", + "})\n", + "\n", + "test_that('Solution should be a single character (\"A\", \"B\", \"C\", or \"D\")', {\n", + " expect_match(answer2.2, \"a|b|c|d\", ignore.case = TRUE)\n", + "})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "57e70901acadb444bc293b0d74647078", + "grade": false, + "grade_id": "cell-bb806d6682525df9", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 2.3: Alternative Hypothesis
\n", + "{points: 2}\n", + "\n", + "Which of the following would be an appropriate alternative hypothesis for us to set, given the above situation?\n", + "\n", + "A. $H_1$: The median flipper length of the Adelie penguins is the same as the median flipper length of the Chinstrap penguins.\n", + "\n", + "B. $H_1$: The mean flipper length of the Adelie penguins is different from the mean flipper length of the Chinstrap penguins.\n", + "\n", + "C. $H_1$: The median flipper length of the Adelie penguins is different from the median flipper length of the Chinstrap penguins.\n", + "\n", + "D. $H_1$: The median flipper length of the Adelie penguins is less than the median flipper length of the Chinstrap penguins.\n", + "\n", + "Your answer should be a string containing one letter.\n", + "\n", + "_Assign your answer to an object called `answer2.3`. Your answer should be a single character surrounded by quotes._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e261003998b7e3394435ce03d6c5c600", + "grade": false, + "grade_id": "cell-d05d1675c44c9eb6", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.3 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b59b80b2e0857c646645ccf56ee768af", + "grade": true, + "grade_id": "cell-550bdafdc0a5c4c6", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "test_that('Did not assign answer to an object called \"answer2.3\"', {\n", + " expect_true(exists(\"answer2.3\"))\n", + "})\n", + "\n", + "test_that('Solution should be a single character (\"A\", \"B\", \"C\", or \"D\")', {\n", + " expect_match(answer2.3, \"a|b|c|d\", ignore.case = TRUE)\n", + "})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "4c29c22c3eb15b03615853e8e5fe1070", + "grade": false, + "grade_id": "cell-f4da9409ad2b8f49", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 2.4
\n", + "{points: 2}\n", + "\n", + "Count the numbers of Adelie penguins and Chinstrap penguins examined in `adelie_chinstrap_flipper`.\n", + "\n", + "```r\n", + "penguin_count <-\n", + " ... %>% \n", + " count(...)\n", + "```\n", + "\n", + "_Assign your data frame to an object called `penguin_count`. Your data frame should have only two columns: `species` and `n`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "806ded0de932e3c8dac0e3dc22d72788", + "grade": false, + "grade_id": "cell-743723213f0f9cb1", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# penguin_count <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "penguin_count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "bc2950245a351fcf6f2acf3aa2701420", + "grade": true, + "grade_id": "cell-541be377c0b58516", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.4()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "7965666b24111ef9c1f3e88dc27f28dc", + "grade": false, + "grade_id": "cell-d74ad5c7b9e32c98", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 2.5
\n", + "{points: 3}\n", + "\n", + "Calculate the observed test statistic with the `infer` package. Use `adelie_chinstrap_flipper` to specify the response and explanatory variables, and calculate Adelie's median minus Chinstrap's median. \n", + "\n", + "_Assign your data frame to an object called `observed_diff_in_medians`. Your data frame should have only one column, `stat`, and one row._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "bceeb78bc6f0ca154975f0b0b97dfae8", + "grade": false, + "grade_id": "cell-8944e82a58ce1446", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#obs_diff_in_medians <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "obs_diff_in_medians" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "bc78f353bf4cf506152c2941d333bfd0", + "grade": true, + "grade_id": "cell-9e72fe57d4e4d98c", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.5()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "47fb8fed6f8dd21e214313ca29f20910", + "grade": false, + "grade_id": "cell-b818e868e059e825", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 2.6: Simulating from the null distribution
\n", + "{points: 3}\n", + "\n", + "Using the `infer` package, generate 1000 samples from the null distribution. Use `adelie_chinstrap_flipper` to specify the response and explanatory variables, hypothesize, generate 1000 samples and calculate Adelie's median minus Chinstrap's median.\n", + "\n", + "_Assign your data frame to an object called `null_diff_in_medians`. Your data frame should have only two columns: `replicate` and `stat`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "2ffcf13dfcd0713e4d243b96d9ef2ecd", + "grade": false, + "grade_id": "cell-9a967990242fda67", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(5437) # Do not change this\n", + "\n", + "#null_diff_in_medians <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(null_diff_in_medians)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "cc743206cc386dd945a3bf5fea328c3a", + "grade": true, + "grade_id": "cell-0c90dd8cb33e5c6b", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.6()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "5bc3535fd4adc34a4bc4c6c6a6dfc27e", + "grade": false, + "grade_id": "cell-a8e5bf9578f7b714", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 2.7
\n", + "{points: 3}\n", + "\n", + "Plot the result of the hypothesis test with `visualize` with 10 bins, put a vertical bar for the observed test statistic `obs_diff_in_medians`, and shade the tail(s).\n", + "\n", + "_Assign your plot to an object called `diff_in_medians_plot`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d23e39af21e50f8cd74075bfb7700b57", + "grade": false, + "grade_id": "cell-88088363709a3021", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#diff_in_medians_plot <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "diff_in_medians_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "63adea0680ea7d89391acfd16cb3ce9e", + "grade": true, + "grade_id": "cell-722525e3c18b6163", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.7()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a758671124603b3e24261a638dc183bd", + "grade": false, + "grade_id": "cell-4efbcadd10da44f6", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 2.8
\n", + "{points: 3}\n", + "\n", + "Obtain the p-value of `obs_diff_in_medians` from `null_diff_in_medians`. Leave your answer as a $1 \\times 1$ tibble with column name `p_value`.\n", + "\n", + "_Assign your data frame to an object called `answer2`. Your data frame should have only one column: `p_value`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "2b12c17996890716e9e4962dfa81dc0b", + "grade": false, + "grade_id": "cell-9cc94e6fc7915e7e", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.8 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.8" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "6b6f50e542add65b9608c2654c00f0f6", + "grade": true, + "grade_id": "cell-6ae9456876481e63", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.8()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "9b7acd8848a85509c9e86d28a261f590", + "grade": false, + "grade_id": "cell-79472809acf073c4", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 2.9
\n", + "{points: 2}\n", + "\n", + "We should never report a p-value of 0 because this suggests that making a Type I error is impossible. But this is too bold of a claim to make.\n", + "\n", + "What would be the best way to report the p-value? Think about what the next smallest p-value is possible to be calculated, given that we are using 1000 repetitions to calculate the sample.\n", + "\n", + "A. The p-value is < 0.05\n", + "\n", + "B. The p-value is < 0.01\n", + "\n", + "C. The p-value is < 0.001\n", + "\n", + "D. The p-value is < 0.0001\n", + "\n", + "\n", + "_Assign you answer to an object called `answer2.9`. Your answer should be a string containing one letter._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "6201c3eec45b97846ac64f2fed3c9297", + "grade": false, + "grade_id": "cell-38a0bd20fb422203", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.9 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer2.9" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c0ab1fc2246fffaa35429e37a97745e3", + "grade": true, + "grade_id": "cell-dca9a139679d58de", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.9()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "e26a3f7108a91ca53dce089d56d1fae3", + "grade": false, + "grade_id": "cell-eae8d4303213040d", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 2.10: Conclusion of the test
\n", + "{points: 3}\n", + "\n", + "What can we conclude based on the result of the hypothesis test?\n", + "\n", + "A. Given a p-value < 0.001 we reject the null hypothesis.\n", + "\n", + "B. Given a p-value < 0.001 we accept the alternative hypothesis at the 5% significance level.\n", + "\n", + "C. Given a p-value < 0.001 we do not reject the null hypothesis at the 5% significance level.\n", + "\n", + "D. Given a p-value < 0.001 we reject the null hypothesis at the 5% significance level.\n", + "\n", + "\n", + "_Assign your answer to an object called `answer2.10`. Your answer should be a string containing one letter._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "22a7faea9ef4cefe6780364be6382576", + "grade": false, + "grade_id": "cell-f0c4498d52c61f7b", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.10 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f44d3244456a0ba9cf120107401901a7", + "grade": true, + "grade_id": "cell-48cf027f333efc2a", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "test_that('Did not assign answer to an object called \"answer2.10\"', {\n", + " expect_true(exists(\"answer2.10\"))\n", + "})\n", + "\n", + "test_that('Solution should be a single character (\"A\", \"B\", \"C\", or \"D\")', {\n", + " expect_match(answer2.10, \"a|b|c|d\", ignore.case = TRUE)\n", + "})\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a6eaedfca1a6dce001bb83b71a410b80", + "grade": false, + "grade_id": "cell-b442d7a4c1941e61", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 2.11
\n", + "{points: 3}\n", + "\n", + "Now we would like to find the 90% confidence interval for the difference in median. First, let's find the bootstrap distribution for the difference in medians with the `infer` package. Use `diff_in_medians_bootstrap_dist` to specify the response and explanatory variables, generate 1000 samples, and calculate Adelie's median minus Chinstrap's median. \n", + "\n", + "_Assign your data frame to an object called `diff_in_medians_bootstrap_dist`. Your data frame should have only two columns: `replicate` and `stat`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a3aa770f21085b7c3258c4b047817574", + "grade": false, + "grade_id": "cell-32def599d1f74cff", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(9263) # Do not change this\n", + "\n", + "#diff_in_medians_bootstrap_dist <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(diff_in_medians_bootstrap_dist)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "6bdfc269f619f2338d01288049b596e1", + "grade": true, + "grade_id": "cell-1f971841b9de346a", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.11()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "df807146b0e9fdbf2a5302d91b94baf5", + "grade": false, + "grade_id": "cell-3841d11be9807d7c", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 2.12
\n", + "{points: 2}\n", + "\n", + "Use `diff_in_medians_bootstrap_dist` to find the 90% confidence interval.\n", + "\n", + "_Assign your data frame to an object called `diff_in_medians_ci`. Your data frame should have two columns: `lower_ci` and `upper_ci`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "2134b19355737a2e1dce67a19feb48d1", + "grade": false, + "grade_id": "cell-314b02546d1b8263", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#diff_in_medians_ci <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "diff_in_medians_ci" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "425191d45f0ab67578de06fe7f7a0297", + "grade": true, + "grade_id": "cell-02c49394935e92c8", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "test_that('Did not assign answer to an object called \"diff_in_medians_ci\"', {\n", + "expect_true(exists(\"diff_in_medians_ci\"))\n", + "})\n", + "\n", + "test_that(\"Solution should be a data frame\", {\n", + "expect_true(\"data.frame\" %in% class(diff_in_medians_ci))\n", + "})\n", + "\n", + "expected_colnames <- c(\"lower_ci\", \"upper_ci\")\n", + "given_colnames <- colnames(diff_in_medians_ci)\n", + "test_that(\"Data frame does not have the correct columns\", {\n", + " expect_equal(length(setdiff(\n", + " union(expected_colnames, given_colnames),\n", + " intersect(expected_colnames, given_colnames)\n", + " )), 0)\n", + "})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d11211a5820ae491ae2867d71a29e33b", + "grade": false, + "grade_id": "cell-aa117608bf62ed0b", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + " Question 2.13
\n", + "{points: 2}\n", + "\n", + "Visualize the confidence interval `diff_in_medians_ci` with the bootstrap distribution `diff_in_medians_bootstrap_dist`.\n", + "\n", + "Assign your plot to an object called `diff_in_medians_ci_plot`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e1818d45b088980938daa2a7106b4d35", + "grade": false, + "grade_id": "cell-33ccfe26348df47a", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# diff_in_medians_ci_plot <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "diff_in_medians_ci_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "65918957b8e789763c4ade0d71c58120", + "grade": true, + "grade_id": "cell-a56d2bec8b964479", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.13()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "c9bde7c3c315ef9c2003c7a0d51dc7a3", + "grade": false, + "grade_id": "cell-1146dfe6f077838a", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## 3. Breast Cancer and Radiation Therapy\n", + "\n", + "  For this question, we will use the dataset found at https://archive.ics.uci.edu/ml/datasets/Breast+Cancer. The dataset contains information on 286 breast cancer patients, including variables on tumour size, tumour location, radiation therapy, cancer recurrence, and other basic medical history data. Given this dataset, we want to investigate whether there is a significant difference in the proportions of cancer recurrence between patients who were treated with experimental radiation therapy and patients who were not (i.e. received an alternate treatment). We will assume that the patients have been randomized into each of these two treatment groups.\n", + "\n", + "  Let's load this dataset. Note that the \"irradiat\" column indicates whether or not the patient received radiation therapy, while the \"Class\" column indicates whether or not the patient experienced a cancer recurrence event." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c692b2030ef65c0c4dd7c380ee4797e5", + "grade": false, + "grade_id": "cell-5ce07a14b10cac34", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "breast_cancer <- read.csv(url(\"https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer/breast-cancer.data\"),header=FALSE)\n", + "colnames(breast_cancer) <- c(\"class\", \"age\", \"menopause\", \"tumor-size\", \"inv-nodes\", \"node-caps\", \"deg-malig\", \"breast\", \"breast-quad\", \"irradiat\")\n", + "head(breast_cancer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f9c3a63574e0e1ab5a05b857924e34fa", + "grade": false, + "grade_id": "cell-f86991f96a49f4c7", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "recurrence_irradiat <- \n", + " breast_cancer %>%\n", + " select(class, irradiat)\n", + "\n", + "head(recurrence_irradiat)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "666e8089d01e11154811fdd038871121", + "grade": false, + "grade_id": "cell-dacb75318f7c5a87", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "  Let's group by `class` and `irradiat` and tally how many samples are in each group." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "cdc25dc2ee67ea05cfa1995a295930e4", + "grade": false, + "grade_id": "cell-4b0dd6b1d19b03b8", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "recurrence_irradiat %>%\n", + " group_by(irradiat, class) %>%\n", + " tally() %>%\n", + " spread(irradiat, n)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "710a59096eab221b57d9d74652bcb5df", + "grade": false, + "grade_id": "cell-b5bfec02b1f8467d", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 3.1
\n", + "{points: 3}\n", + "\n", + "Let $p_{1}$ be the proportion of radiation therapy patients (irradiat=true) that subsequently experienced cancer recurrence, and let $p_{2}$ be the proportion of patients that did not receive radiation therapy (irradiat=false) and subsequently experienced cancer recurrence. \n", + "\n", + "We want to test $$H_0: p_{1} = p_{2},$$ and $$H_a: p_{1} \\neq p_{2}.$$\n", + "\n", + "Calculate the observed test statistic $\\hat{p}_1 - \\hat{p}_2$ using `recurrence_irradiat` by first specifying the response and explanatory variables.\n", + "\n", + "_Assign your data frame to an object called `obs_diff_prop`. Your data frame should have only one column, `stat`, and one row._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a0513ff0c44df51c985fefb16cf4ffe3", + "grade": false, + "grade_id": "cell-50f5046219d6451d", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#obs_diff_prop <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "obs_diff_prop " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "52c6d4b1434417425bfc15ebd3dfd6e2", + "grade": true, + "grade_id": "cell-1149952055648d06", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.1()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "bdebfa9d75e7fb0b241940e2ea362d81", + "grade": false, + "grade_id": "cell-8b47ef7e19d98891", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 3.2: Null Distribution
\n", + "{points: 3}\n", + "\n", + "Generate 1000 samples from the null distribution. Use `recurrence_irradiat` to specify the response and explanatory variables, hypothesize, generate 1000 samples and calculate the proportion of irradiated patients having recurrent cancer minus the proportion of non-irradiated patients having recurrent cancer. \n", + "\n", + "_Assign your data frame to an object called `irradiat_null_distribution`. Your data frame should have only two columns: `replicate` and `stat`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "df5a2ade9c4b3829b7470f35f3a4dece", + "grade": false, + "grade_id": "cell-e6801b15b4258c14", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(3526)\n", + "#irradiat_null_distribution <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(irradiat_null_distribution)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "5ed8fa0eb7bf868775377c0209ddfdb1", + "grade": true, + "grade_id": "cell-3a16b15f5692aca8", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.2()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "fa10e62d57f04d37b5f1eb5e1ccf9ce8", + "grade": false, + "grade_id": "cell-fcaaa01f1f5c44a9", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 3.3
\n", + "{points: 3}\n", + "\n", + "Plot the result of the hypothesis test using `visualize` with 10 bins, put a vertical bar for the observed test statistic `obs_diff_prop`, and shade the tail(s).\n", + "\n", + "Assign your answer to an object called `irradiate_result_plot`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "73262f6ab1a548b80faac6a5ecb24887", + "grade": false, + "grade_id": "cell-7818997009c7aef4", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#irradiate_result_plot <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "irradiate_result_plot " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "0eb82ca605bc9341f4d48370ab5070bd", + "grade": true, + "grade_id": "cell-c5d0b20f2d897555", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.3()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "e746765dd6e03b19efa171a66f267e91", + "grade": false, + "grade_id": "cell-cfc255e700f115c2", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Question 3.4: Calculate p-value
\n", + "{points: 3}\n", + "\n", + "Obtain the p-value from `irradiat_null_distribution`. Leave your answer as a $1 \\times 1$ tibble with column name `p_value`.\n", + "\n", + "Assign your answer to an object called `answer3.4`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "61e82491699cc7bea0c066dbed6d16eb", + "grade": false, + "grade_id": "cell-cdd617571e2e7956", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer3.4<-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer3.4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "ac9c583a088dfe2e62365d918aeb964d", + "grade": true, + "grade_id": "cell-506d7cba4ff83f8d", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Here we check to see if you have given your answer the correct object name\n", + "# and if your answer is plausible. However, all other tests have been hidden\n", + "# so you can practice deciding when you have the correct answer.\n", + "\n", + "test_that('Did not assign answer to an object called \"answer3.4\"', {\n", + " expect_true(exists(\"answer3.4\"))\n", + " })\n", + "\n", + " test_that(\"Solution should be a data frame\", {\n", + " expect_true(\"data.frame\" %in% class(answer3.4))\n", + " })\n", + "\n", + "expected_colnames <- c(\"p_value\")\n", + "given_colnames <- colnames(answer3.4)\n", + "test_that(\"Data frame does not have the correct columns\", {\n", + " expect_equal(length(setdiff(\n", + " union(expected_colnames, given_colnames),\n", + " intersect(expected_colnames, given_colnames)\n", + " )), 0)\n", + "})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "514a984880ac3bdd93ff4918682f41a8", + "grade": false, + "grade_id": "cell-a5c982b6b269f9be", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "  Thus, given the p-value above, we reject the null hypothesis at 5% significance level.\n", + "\n", + "  Given this result and the test statistic that we observed in Question 3.1, there is evidence to suggest that cancer recurrence is associated with the type of treatment received. Specifically, patients who received the experimental radiation therapy may be more likely to experience cancer recurrence than patients who did not. This may be attributable to its lower effectiveness at eliminating the cancer present, compared to alternative treatments." + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,Rmd" + }, + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.2.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/materials/worksheet_06/data/hcv-data-set.csv b/materials/worksheet_06/data/hcv-data-set.csv new file mode 100644 index 0000000..4516c9e --- /dev/null +++ b/materials/worksheet_06/data/hcv-data-set.csv @@ -0,0 +1,612 @@ +category,age,sex,albumin,alkaline_phosphatase,alanine_aminotransferase," +aspartate_aminotransferase",bilirubin," +cholinesterase"," +cholesterol",creatinine,gamma_glutamyl_transferase,PROT +Blood Donor,32,m,38.5,52.5,7.7,22.1,7.5,6.93,3.23,106,12.1,69 +Blood Donor,32,m,38.5,70.3,18,24.7,3.9,11.17,4.8,74,15.6,76.5 +Blood Donor,32,m,46.9,74.7,36.2,52.6,6.1,8.84,5.2,86,33.2,79.3 +Blood Donor,32,m,43.2,52,30.6,22.6,18.9,7.33,4.74,80,33.8,75.7 +Blood Donor,32,m,39.2,74.1,32.6,24.8,9.6,9.15,4.32,76,29.9,68.7 +Blood Donor,32,m,41.6,43.3,18.5,19.7,12.3,9.92,6.05,111,91,74 +Blood Donor,32,m,46.3,41.3,17.5,17.8,8.5,7.01,4.79,70,16.9,74.5 +Blood Donor,32,m,42.2,41.9,35.8,31.1,16.1,5.82,4.6,109,21.5,67.1 +Blood Donor,32,m,50.9,65.5,23.2,21.2,6.9,8.69,4.1,83,13.7,71.3 +Blood Donor,32,m,42.4,86.3,20.3,20,35.2,5.46,4.45,81,15.9,69.9 +Blood Donor,32,m,44.3,52.3,21.7,22.4,17.2,4.15,3.57,78,24.1,75.4 +Blood Donor,33,m,46.4,68.2,10.3,20,5.7,7.36,4.3,79,18.7,68.6 +Blood Donor,33,m,36.3,78.6,23.6,22,7,8.56,5.38,78,19.4,68.7 +Blood Donor,33,m,39,51.7,15.9,24,6.8,6.46,3.38,65,7,70.4 +Blood Donor,33,m,38.7,39.8,22.5,23,4.1,4.63,4.97,63,15.2,71.9 +Blood Donor,33,m,41.8,65,33.1,38,6.6,8.83,4.43,71,24,72.7 +Blood Donor,33,m,40.9,73,17.2,22.9,10,6.98,5.22,90,14.7,72.4 +Blood Donor,33,m,45.2,88.3,32.4,31.2,10.1,9.78,5.51,102,48.5,76.5 +Blood Donor,33,m,36.6,57.1,38.9,40.3,24.9,9.62,5.5,112,27.6,69.3 +Blood Donor,33,m,42,63.1,32.6,34.9,11.2,7.01,4.05,105,19.1,68.1 +Blood Donor,33,m,44.3,49.8,32.1,21.6,13.1,7.44,5.59,103,30.2,74 +Blood Donor,33,m,46.7,88.3,23.4,23.9,7.8,9.42,4.62,78,29.5,74.3 +Blood Donor,34,m,42.7,65.3,46.7,30.3,23.4,10.95,5.06,75,99.6,69.1 +Blood Donor,34,m,43.4,46.1,97.8,46.2,11.3,7.99,3.62,71,35.3,69.6 +Blood Donor,34,m,40.5,32.4,29.6,27.1,5.8,10.5,4.56,91,26.6,72 +Blood Donor,34,m,44.8,77.7,36.9,31,19.5,10.51,5.59,80,23.7,78.9 +Blood Donor,34,m,42.6,27,21.4,21.7,7.2,8.15,6.79,85,13.9,67.7 +Blood Donor,34,m,29,41.6,29.1,16.1,4.8,6.82,4.03,62,14.5,53.2 +Blood Donor,34,m,44.6,84.1,19.6,29.8,5.8,7.6,5.07,95,9.9,71.9 +Blood Donor,34,m,46.8,61.7,24.5,24.2,23.1,10.99,4.6,83,23.8,73.1 +Blood Donor,34,m,41.8,75.8,30.9,35.5,6.1,9.97,5.94,89,48.5,71.3 +Blood Donor,34,m,46.1,70.6,35.8,30,7.6,7.7,4.2,93,14.3,78.7 +Blood Donor,34,m,43.6,58.9,47.1,31.1,18.5,9.14,4.99,95,22.2,69.3 +Blood Donor,35,m,37.5,69.8,37.1,25,7.8,11.66,5.73,84,27.3,71 +Blood Donor,35,m,42.1,68.3,37.2,56.2,11.1,9.3,4.63,99,16.8,73.6 +Blood Donor,35,m,44.7,79.3,53.5,30.8,9.7,11.39,7.04,88,77.3,77.1 +Blood Donor,35,m,41.5,115.1,24.1,30.4,5.7,9.41,4.33,81,22.2,71.3 +Blood Donor,35,m,48.7,72.7,24.1,31,45.1,9.4,3.8,90,20,75.8 +Blood Donor,35,m,47.3,92.2,30.7,25.7,6.6,11.58,5.9,82,36.9,77.8 +Blood Donor,35,m,44.5,70.3,26.2,25.1,5.1,10.12,4.69,82,20.7,67.2 +Blood Donor,35,m,47.4,54.5,18.6,21.6,10.3,8.1,6.23,66,28.1,74 +Blood Donor,35,m,51,82.7,29.3,26.8,8.7,12.32,5.44,89,25,77.3 +Blood Donor,35,m,27.8,99,30.7,27.8,9.4,6.8,4.27,65,40.5,80.7 +Blood Donor,36,m,46.1,58.5,26.8,25.3,6,6.61,5.07,71,10.5,79.6 +Blood Donor,36,m,45.5,57.6,22.5,19.5,7.5,5.28,4.06,88,62.5,71.6 +Blood Donor,36,m,41.7,77.2,103.6,46.9,10.4,12.21,5.63,88,20.9,69.3 +Blood Donor,36,m,45.9,58.8,29.7,27.7,11.7,5.6,4.89,93,23.1,70.8 +Blood Donor,36,m,48.7,65,11.5,18,7.4,8.02,7.35,69,14.2,73.4 +Blood Donor,36,m,53,66.4,40.8,23.2,7.5,8.73,5.81,75,36.1,77.3 +Blood Donor,36,m,47.8,89,48.5,38.4,8.6,8.26,5.62,96,21.9,76.2 +Blood Donor,36,m,42.6,65.3,35.8,27.1,15.7,10.66,4.38,96,34.7,71 +Blood Donor,36,m,42.4,47.3,23,25.5,6.1,9.46,5.29,79,17.5,73.8 +Blood Donor,36,m,48.9,82.8,16.9,24.4,8.9,8.91,5.1,97,14.8,79.9 +Blood Donor,37,m,31.4,106,16.6,17,2.4,5.95,5.3,68,22.9,72.3 +Blood Donor,37,m,42.9,70.7,16.3,24.1,15.7,9.03,6.8,93,70.1,73.4 +Blood Donor,37,m,44,57.4,26.1,24.6,9.7,10.41,6.17,83,38.9,76.5 +Blood Donor,37,m,41.5,64.6,23.7,29.9,9.3,5.49,3.97,100,10.4,69.3 +Blood Donor,37,m,47.9,68.8,40.3,46.9,6,9.76,6.42,81,22.7,80.6 +Blood Donor,37,m,44.8,94.3,32.2,36.7,6.3,9.76,4.12,113,23.8,72.5 +Blood Donor,37,m,38.6,61.2,24.6,31.9,7.9,6.02,4.63,72,10.3,56.3 +Blood Donor,37,m,51.2,84.5,18.8,24.7,9.9,8.62,6.59,94,25.3,76.3 +Blood Donor,37,m,41.9,77.5,24.9,25.8,4.1,8.7,4.36,84,16,71.5 +Blood Donor,37,m,46.1,44.3,42.7,26.5,6.4,10.86,5.05,74,22.2,73.1 +Blood Donor,37,m,50.4,48.5,19.4,27.5,11.6,5.78,4.93,90,27.8,75 +Blood Donor,37,m,33.9,64,91.7,44.7,9.1,8.35,5.4,95,30.3,74.7 +Blood Donor,37,m,42.9,61.8,96.1,44.1,9.6,7.82,5.1,82,32.3,69.3 +Blood Donor,37,m,40.8,118.9,17.2,19.2,3.2,9.17,4.26,88,13.5,72 +Blood Donor,37,m,43.6,72.8,51.4,43.7,13.8,8.16,4.88,70,94.5,75.2 +Blood Donor,37,m,46.4,53.3,20.2,24.9,8.7,8.63,5.9,86,23.3,78.9 +Blood Donor,37,m,48.7,62.3,21,21.1,41.9,9.71,4.02,84,16,75.1 +Blood Donor,38,m,48.1,63.2,11.7,14.7,5.1,8.83,3.87,85,9.5,73.1 +Blood Donor,38,m,39.9,62.9,71.7,43.9,10.4,10.9,7.01,99,88.3,73.1 +Blood Donor,38,m,45.5,50.2,16.3,22.8,10.9,8.73,5.88,103,13.8,76.1 +Blood Donor,38,m,43.4,58.9,45.7,45.7,9.2,10.86,5.74,90,30,73.9 +Blood Donor,38,m,41.2,43.8,19.9,20.5,10.1,5.95,3.91,63,70.8,67.4 +Blood Donor,38,m,44.7,69.4,47.4,35.1,16.7,6.9,4.14,67,17.3,70.1 +Blood Donor,38,m,42,42.7,34.8,42.2,3.3,6.1,4.74,96,14.6,66.7 +Blood Donor,38,m,48.4,44.9,23.4,22.1,7.9,10.53,7.51,87,43.2,82.6 +Blood Donor,38,m,38.3,81.5,65.9,35.3,14,11,5.46,80,45.2,68.1 +Blood Donor,38,m,40.5,61.7,18.6,24.7,6.7,8.47,6.05,89,19.6,75.6 +Blood Donor,39,m,45.1,63.9,26,21.3,9.3,8.57,5.24,79,29.4,71.2 +Blood Donor,39,m,38.8,52.5,54.3,31.3,10.1,10.68,6.26,81,31.5,77.2 +Blood Donor,39,m,47,66.5,24.8,29.1,10.5,10.04,6.26,73,49.8,78.4 +Blood Donor,39,m,36.6,71.3,28.9,25.9,6.1,5.7,4.66,77,22.6,66.3 +Blood Donor,39,m,43.9,90.1,87.9,60.6,8.6,9.94,4.64,98,99.3,66.2 +Blood Donor,39,m,43.4,48.6,33.4,35,23.4,9.68,5.55,73,22.1,75.2 +Blood Donor,39,m,36,36.5,21.5,25.7,3.3,8.43,4.85,93,23.1,70.5 +Blood Donor,39,m,45.7,73.4,45.5,33.8,5,9.07,4.41,82,20.6,78.9 +Blood Donor,39,m,37.5,80.2,32.6,25.3,2.7,9.3,6.85,72,27.5,73.1 +Blood Donor,39,m,46.4,102.9,44.4,26.2,4.1,9.29,8.89,103,64,72.2 +Blood Donor,39,m,45.8,62.5,20.7,22.8,45.5,8.61,4.78,77,15.9,75.5 +Blood Donor,39,m,46.1,77.1,34.9,37.6,4.1,13.86,8.11,94,76.9,71.1 +Blood Donor,40,m,47.8,68,17.5,22.3,7.7,9.94,6.09,88,23.3,73.9 +Blood Donor,40,m,44.5,45.7,25.9,27.6,9.2,8.93,4.49,87,17,71.2 +Blood Donor,40,m,41.4,67.5,59.8,36.8,7.3,4.18,6.02,76,92.7,72.5 +Blood Donor,40,m,45,74.2,20.9,29.1,12,9.1,6.29,92,24.1,74.1 +Blood Donor,40,m,39.1,66.5,33.3,32.9,14.8,7.87,4.91,88,18.5,68.8 +Blood Donor,40,m,45.1,63.4,39.6,31.4,19.7,11.31,4.74,91,18.2,81.1 +Blood Donor,40,m,45.4,52.9,41.3,26.2,10.6,10.27,6.85,82,40.2,76.6 +Blood Donor,40,m,46.9,61.8,27.4,43,7,7.26,5.09,98,14.2,75.3 +Blood Donor,41,m,45.1,86.8,24,23.9,10.6,7.84,6.23,72,35.3,75.1 +Blood Donor,41,m,38.5,63.9,34.3,43.4,9.6,6.8,4.13,90,11.4,64.1 +Blood Donor,41,m,42.3,55.9,19.6,18.9,10.9,7.15,3.29,86,24.5,76.1 +Blood Donor,41,m,46.9,69,19.7,27.3,6.1,6.39,4.55,72,21.6,71.3 +Blood Donor,41,m,40.2,48.2,37,34.9,9.2,11.35,5.56,78,26.6,73.3 +Blood Donor,41,m,53.3,79,29.5,28.5,14.1,7.96,5.9,87,33.8,83.4 +Blood Donor,41,m,44.7,74.9,25.2,20.2,6.3,10.34,4.23,74,23.7,72.1 +Blood Donor,41,m,37.4,75.1,28,25.7,4.1,10.62,5.57,83,18.6,71.9 +Blood Donor,42,m,45.3,55.3,31,50,18.5,15.43,5.88,83,15.4,72.4 +Blood Donor,42,m,44.1,46.8,23.8,19.4,7,10.83,6.28,95,19.7,73 +Blood Donor,42,m,37.8,78.6,51.4,31.8,10.1,9.66,6.15,85,15.1,70.8 +Blood Donor,42,m,46.3,71.4,30.3,22.5,26.9,7.95,7.3,88,21.8,71.8 +Blood Donor,42,m,46.4,43.7,18.7,24.3,6.3,6.59,4.78,93,10.1,73.1 +Blood Donor,42,m,38.1,88.7,46.7,37,12.5,7.26,6.76,54,18.4,72.1 +Blood Donor,42,m,46.9,68.7,118.1,49.3,20.2,7.93,5.62,86,74.9,73.8 +Blood Donor,42,m,37.8,83.7,25.3,20,18.6,7.52,5.07,108,17.4,64.1 +Blood Donor,42,m,46.2,57.7,35.6,22.9,14.8,11.01,8.6,80,70.1,77.5 +Blood Donor,43,m,37,82,25.3,26.2,5.4,8.42,4.96,79,15.9,68.4 +Blood Donor,43,m,39.7,126,21.9,28.9,16.2,8.48,5.3,114,21.4,80.3 +Blood Donor,43,m,54.4,99.3,40.2,29.5,6.1,8.84,5.92,95,22.3,79.3 +Blood Donor,43,m,36.3,67.3,28.7,27.9,5.5,8.55,5.27,97,17.1,74.2 +Blood Donor,43,m,48.6,45,10.5,40.5,5.3,7.09,NA,63,25.1,70 +Blood Donor,43,m,42.9,50.7,26.4,22,6.4,10.2,4.72,81,15.2,71.8 +Blood Donor,43,m,39.4,61.8,55,32.5,7.1,11.94,7.84,64,153.5,75.6 +Blood Donor,43,m,41.5,75.6,15.3,21.2,5.2,7.36,4.71,84,12.9,72.2 +Blood Donor,43,m,42.8,76.9,57.7,33,11.3,9.63,9.43,72,23.3,72.9 +Blood Donor,43,m,43.9,40.7,35.8,188.7,8.7,7.7,5.41,90,21.6,75.4 +Blood Donor,43,m,41.2,59.4,34.3,24.2,4.1,9.64,4.93,84,20.8,74.5 +Blood Donor,44,m,44.4,84.9,28,26.1,33.9,10.28,4.68,84,14.1,76.9 +Blood Donor,44,m,38.4,110.8,17.2,30.7,4.6,9.51,5.12,74,25.2,73.6 +Blood Donor,44,m,44.5,93.6,27.5,25.5,5.4,7.79,4.59,108,16.7,76.8 +Blood Donor,44,m,37.4,105.7,16.2,21.3,5.2,8.69,6.02,67,14,70.9 +Blood Donor,44,m,35.5,81.7,27.5,29.5,6.4,8.81,6.65,83,24.1,68 +Blood Donor,44,m,44,84.3,47.4,31.3,4.1,10.28,5.42,105,34.2,75.3 +Blood Donor,44,m,48.6,73.5,49.6,32.5,22.6,11.24,6.68,95,32.2,74 +Blood Donor,44,m,40.8,71.6,17.2,21,3.5,7.38,4.46,88,11.4,63.4 +Blood Donor,44,m,47.3,43.9,16.2,19.3,4.7,10.02,5.85,100,14.9,71.2 +Blood Donor,44,m,44.6,53.9,28.4,23.8,8.8,9.47,4.87,94,21.3,69.2 +Blood Donor,45,m,41.4,63.2,41.4,31.6,5.8,12.28,7.29,82,56,70.6 +Blood Donor,45,m,46.5,77.8,52.9,34,12.1,9.82,5.58,101,34.9,76.2 +Blood Donor,45,m,36.2,102.6,70.8,40.3,14.5,8.69,5.98,94,80.6,66.6 +Blood Donor,45,m,41.7,73.2,43.6,29.4,6.4,8.89,5.31,71,67.4,70.3 +Blood Donor,45,m,43.2,68.2,27.8,42.3,6.6,10.93,6.61,105,27.2,74.5 +Blood Donor,45,m,40.3,64.4,18.8,27.2,5.2,6.71,4.25,74,19.7,67.2 +Blood Donor,45,m,46.4,77,60.3,32.9,8.8,9.38,6.27,85,81.1,75.5 +Blood Donor,45,m,44.3,71.5,15.9,16.3,7.3,8,5.35,83,19.8,71.3 +Blood Donor,45,m,45.7,75.4,41.4,28.6,7.7,10.88,6.25,99,85.4,78.8 +Blood Donor,46,m,42.6,61.6,18.6,21.4,10.2,7,6.13,89,19.9,73.7 +Blood Donor,46,m,35.7,49.2,22.7,23,17.4,8.51,5.85,87,13.4,62.1 +Blood Donor,46,m,39.7,40.3,14.6,22.3,5.3,6.3,4.66,71,11.7,67.2 +Blood Donor,46,m,37.9,79.9,17.7,20.1,12.7,10.1,5.38,72,12.7,67.7 +Blood Donor,46,m,41.8,65.6,30.5,28.2,6.1,10.68,4.72,82,24.8,65 +Blood Donor,46,m,47.7,51.6,19.7,21.8,6.3,7.25,5.19,84,20.9,75.2 +Blood Donor,46,m,44.8,98,41,30.6,4,10.92,7.42,91,28.8,79.8 +Blood Donor,46,m,49.1,60,19.5,20.5,3.1,7.81,5.02,102,20.8,70.2 +Blood Donor,46,m,43,41,15.1,20.7,5.1,7.87,5.35,84,15.4,73.2 +Blood Donor,46,m,45.9,80.1,67.5,36.7,2.9,8.73,4.68,113,50.1,68.8 +Blood Donor,46,m,49.7,48.2,11.5,17.6,11.2,6.74,3.71,106,10.6,75.2 +Blood Donor,46,m,37.3,91.6,33.4,35.9,2.6,9.74,6.73,78,64.2,69.9 +Blood Donor,46,m,47.4,55.9,35.2,33.5,10.2,10.61,4.95,96,41.7,76.3 +Blood Donor,46,m,51.8,82.6,37.3,29.1,13.4,9.97,7.4,90,30.1,80.3 +Blood Donor,47,m,44.7,59.3,27.8,30.5,6.9,7.37,5.1,101,21.1,77.2 +Blood Donor,47,m,41.6,61,31.5,25.6,7.3,8.59,5.66,69,39.1,72.9 +Blood Donor,47,m,49.5,75.2,29.6,29,7.3,10.67,5.96,75,34.9,72.4 +Blood Donor,47,m,40.8,42.8,39,31.7,23.5,6.44,4.86,96,27.8,66.4 +Blood Donor,47,m,48,66.5,17.5,23.2,9.9,7.09,5.06,81,14.9,68.1 +Blood Donor,47,m,42.1,81.6,26.4,26.4,5.5,10.5,5.95,97,35.3,77.5 +Blood Donor,47,m,42.2,52.9,20.7,27.1,13.9,10.15,5.15,79,28.8,69.5 +Blood Donor,47,m,48.3,59.3,49.9,31.7,7.1,11.09,5.56,88,91.5,73 +Blood Donor,47,m,46.2,63,7,17.4,6.5,7.06,5.23,60,26.6,75.4 +Blood Donor,47,m,45.3,51.6,18.4,16.9,7.1,8.96,4.29,74,15.7,69.1 +Blood Donor,47,m,36.7,44.8,29.3,23.5,6.9,7.38,4.44,87,24.4,68.3 +Blood Donor,47,m,48,56.4,23,25.7,6.5,8.32,6.3,89,23.3,78 +Blood Donor,47,m,55.4,61.2,25.4,32.9,28.3,8.39,5.25,96,10.3,82.1 +Blood Donor,48,m,42.5,69.2,44.6,28.3,15.8,11.92,6.76,86,42.9,81.5 +Blood Donor,48,m,46.2,59.9,14.6,25.7,5.3,6.93,6.29,66,23.4,75.8 +Blood Donor,48,m,38.2,94.9,32,29.4,21.5,7.93,5.36,98,53,71.8 +Blood Donor,48,m,43.1,83.9,20.8,27.4,18.3,9.82,6.14,90,16.4,74.9 +Blood Donor,48,m,44.7,56.4,20.9,24.2,17.5,5.49,5.65,74,28.5,75 +Blood Donor,48,m,38.8,94.7,28.6,33.4,5.1,7.93,4.27,87,14.7,66.8 +Blood Donor,48,m,43.4,41.2,18.4,21.2,9.8,7.19,6.37,80,15.8,68.9 +Blood Donor,48,m,46,58.1,21.4,29.3,6.9,9.36,5.7,83,32.3,74.1 +Blood Donor,48,m,45.7,88.5,59.1,34,6.5,10.81,6.25,87,38.3,74.1 +Blood Donor,48,m,42.6,69.1,17.6,28.3,7.7,15.4,5.07,81,29.9,77.8 +Blood Donor,48,m,46.4,64.1,29.3,27.6,13.2,10.07,8.28,98,28.9,83.3 +Blood Donor,48,m,50.4,34.6,37.2,28,17.3,10.87,6.97,114,45.2,72.9 +Blood Donor,49,m,39.1,62.1,23.8,19.6,3.5,9.19,4.82,85,19.4,69.8 +Blood Donor,49,m,39.7,77.3,20.2,19,8.8,7.26,4.98,84,74.5,65.4 +Blood Donor,49,m,41.2,96,25,27.7,15.6,7.12,5.61,92,37.8,68.7 +Blood Donor,49,m,44.7,60.2,34.6,28.5,7.1,10.14,5.68,98,87,69.8 +Blood Donor,49,m,44.4,63.3,13.5,16.4,7.3,11.19,4.49,73,13.6,72.3 +Blood Donor,49,m,40.2,73.8,17.9,23.6,7.6,7.99,5.31,106,15.9,71.4 +Blood Donor,49,m,44.3,84.1,29,29,16.2,8.18,4.65,87,21.9,70.8 +Blood Donor,49,m,47.8,67.9,44.2,29.9,6.9,8.76,6.23,99,26.9,79.5 +Blood Donor,50,m,40,87.8,87.5,52.6,4.8,8.7,6.46,76,152.5,71 +Blood Donor,50,m,46.6,66.3,19.5,23.7,18.5,8.27,5.73,92,12.1,76.7 +Blood Donor,50,m,44.3,82.5,38.6,32.9,7.3,8.57,5.95,83,23.4,66.6 +Blood Donor,50,m,45.7,62.6,55.9,24.3,12.8,10.79,6.42,89,55.2,74.2 +Blood Donor,50,m,43.5,76.2,13.1,18.8,5.6,8.11,4.55,57,16.5,67 +Blood Donor,50,m,36,51,26.1,29,6.5,7.5,5.75,67,64,70.5 +Blood Donor,50,m,35.5,59.4,43.9,35.8,7.6,11.23,6.56,106,24.9,73.2 +Blood Donor,50,m,40.3,70.5,46.4,32.3,6.8,8.11,4.06,77,16.9,69.2 +Blood Donor,50,m,43.8,56.9,29.5,32.8,4.1,7.77,5.8,72,53.6,72.5 +Blood Donor,50,m,43.1,73.7,19,21.8,2.9,9.45,6.06,91,33.3,68.9 +Blood Donor,50,m,39.7,96.5,54.8,30.8,3.3,8.98,5.66,82,52.4,65.8 +Blood Donor,50,m,42.2,145,27.5,37.9,4.5,13.71,8.8,103,239,73.1 +Blood Donor,51,m,38,54.8,23.8,28.6,13.5,6.33,4.65,99,18.3,66.4 +Blood Donor,51,m,36.8,98.5,25.5,27.7,5,9.49,6.08,89,18.2,64.9 +Blood Donor,51,m,47.2,78.1,27.4,23.9,8.4,10.14,6.54,86,35.3,71.6 +Blood Donor,51,m,42.6,69.1,17.6,28.3,7.7,15.4,5.07,81,29.9,77.8 +Blood Donor,51,m,42,84.3,14.7,19.2,3.2,8.19,4.68,81,20.9,77.1 +Blood Donor,51,m,45.9,66.7,31.8,28.1,9,10.08,5.61,85,36.2,73 +Blood Donor,51,m,50.8,56,33.2,32.9,6.3,11.33,5.93,93,22.4,80.3 +Blood Donor,51,m,46.3,69.4,20.1,26.8,4.5,9.99,4.14,81,12.3,73.9 +Blood Donor,52,m,42.3,72.6,47.4,23.8,6,8.62,7.2,86,36.1,77.6 +Blood Donor,52,m,42.2,72.2,47.9,23.7,8.6,11.91,6.29,96,62.5,72.9 +Blood Donor,52,m,82.2,82.2,37,23.7,7.8,8.9,6.09,77,87.8,67.4 +Blood Donor,52,m,39.3,63.6,20.7,25.3,4.4,8.26,5.35,84,18.9,64.4 +Blood Donor,52,m,46.8,85.5,15.2,31.9,10.3,9.09,6.56,89,21.8,73.3 +Blood Donor,52,m,44.7,43.5,40.9,47.2,10.4,10.08,5,74,26.5,75.4 +Blood Donor,52,m,37.2,73.6,28.9,29.8,6.6,6.8,4.94,8,24.1,64.2 +Blood Donor,52,m,40.7,78.6,43.3,27.1,13.4,10.95,5.22,73,30,80.9 +Blood Donor,52,m,41.2,67.4,20.4,18.8,4.3,8.51,4.97,82,40.4,68.5 +Blood Donor,52,m,46.6,72.2,35,41.1,4.6,7.7,6.92,80,21.5,79.6 +Blood Donor,52,m,41,61.4,28.7,34.8,5.4,9.36,6.66,100,26.8,73.6 +Blood Donor,52,m,41.6,59.1,26.3,25.6,3,9.55,4.5,94,33,71.1 +Blood Donor,53,m,45.3,53,39.6,31.3,9.6,9.96,7.22,73,42.9,78.6 +Blood Donor,53,m,38.7,104,66.9,34.3,7.8,8.07,4.6,106,73.7,73.2 +Blood Donor,53,m,44.5,61.2,14.4,18.1,8,6.95,5,70,18.3,72 +Blood Donor,53,m,49,86,28.9,23.9,11.8,6.09,4,74,33.6,75.8 +Blood Donor,53,m,37.8,98.1,30.5,21.1,4,5.02,4.42,94,23.2,65.2 +Blood Donor,53,m,44.8,119.7,29.2,20.7,6,13.8,8.78,64,49.3,75.4 +Blood Donor,53,m,40.5,76.1,27.8,22,6.3,11.14,6.96,90,53.1,71.3 +Blood Donor,53,m,41.7,45.3,23.2,25.1,10.8,5.68,5.78,119,114.9,67.9 +Blood Donor,53,m,38.1,82.5,8,17.5,2.4,9.13,6.28,103,35.8,69.9 +Blood Donor,53,m,49.2,71.8,42.8,29.4,6.8,15.1,6.24,107,48.3,77.8 +Blood Donor,54,m,46.9,74.7,36.2,52.6,6.1,8.84,5.2,86,33.2,79.3 +Blood Donor,54,m,46,70.2,18.6,24.7,24.1,7.83,6.24,76,24.3,76.8 +Blood Donor,54,m,43,67,36.1,26.1,5,10.2,5.98,105,45.4,75.9 +Blood Donor,54,m,46.4,75.1,54.1,39.6,10.6,6.59,7.43,85,73.2,75.2 +Blood Donor,54,m,40.8,72,25.9,29.2,8.5,7.1,5.79,89,15.6,67.9 +Blood Donor,54,m,41.6,100.4,51.8,35.7,4.3,10.37,7.29,92,82.6,72.4 +Blood Donor,55,m,44.1,60,26.3,25.9,5.1,7.23,7.3,88,41.6,77.7 +Blood Donor,55,m,39,45.8,23.5,21.2,2.4,8.41,7.73,73,36.3,73 +Blood Donor,55,m,42.9,92.6,21.6,26.1,7.4,12.86,5.73,94,42.9,70.1 +Blood Donor,55,m,40,44.9,10.2,14.1,2.6,5.98,4.55,71,8.8,66.3 +Blood Donor,55,m,46.2,87.1,36.9,21,4.5,7.55,6.33,80,30.2,72.2 +Blood Donor,55,m,47.6,71.9,25.8,24.5,5.8,9.24,4.63,83,29.1,76.7 +Blood Donor,55,m,28.1,65.5,16.6,17.5,2.8,5.58,4.39,65,26.2,62.4 +Blood Donor,55,m,44.7,71.6,22.9,22.1,5.5,6.82,4.61,105,59.2,72.7 +Blood Donor,55,m,41.5,59.5,15.4,16.2,6.8,6.35,5.22,80,12.4,69.9 +Blood Donor,56,m,39.9,62.9,27.2,22.8,7.7,5.74,6.75,86,30.7,72.5 +Blood Donor,56,m,39.4,97.9,28.2,24.7,5.9,10.4,6.61,82,17.8,74.1 +Blood Donor,56,m,42.9,50,20.4,22.1,5.4,7.28,5.37,90,34.6,76 +Blood Donor,56,m,37.9,49.2,16.6,15.7,3.7,9.9,5.3,95,20.8,67.1 +Blood Donor,56,m,42.1,45.6,30,26.7,6.1,8.9,6.4,71,37.6,70.1 +Blood Donor,56,m,40.2,37.1,30.1,25.1,10.2,9.69,4.93,103,20.7,71.9 +Blood Donor,56,m,49.5,56.3,22.1,24.9,11.5,8.56,4.69,91,14.7,69.6 +Blood Donor,56,m,45.7,45.3,19.5,27,9.3,10.62,6.04,94,17.4,74.5 +Blood Donor,57,m,59.7,64.5,17.3,21.2,18.9,12.07,3.97,106,15.1,77.6 +Blood Donor,57,m,43.5,56.2,60.4,37.3,7.3,6.79,5.99,110,185.2,71.8 +Blood Donor,57,m,43.3,86.8,21.2,22.2,6.8,7.87,4.91,65,19.2,71.3 +Blood Donor,57,m,46.4,80.1,20.2,23.9,19.2,8.02,5.31,74,17.1,77.5 +Blood Donor,58,m,44,56,30.6,33,4.8,12.37,6.33,74,58.7,75.8 +Blood Donor,58,m,46.8,79.3,38,24.1,4.7,9.51,5.07,99,22.9,72.4 +Blood Donor,58,m,41.3,58.9,12.8,23.4,5.4,8.17,5.7,60,10.8,70.1 +Blood Donor,58,m,40.9,55.8,17.5,23,5.3,8.52,6.41,93,18.5,70.9 +Blood Donor,59,m,45.7,115.4,16.4,23.8,3.7,8.2,4.46,70,14.3,78.5 +Blood Donor,59,m,45.3,106.5,13.5,19.2,6.9,7.97,4.86,67,15.7,74.6 +Blood Donor,59,m,41.9,72.4,19.3,25.5,29.8,5.6,3.09,78,27.2,69 +Blood Donor,59,m,39.8,49.4,25.4,21.4,24.7,7.5,3.69,86,18.7,71.9 +Blood Donor,59,m,38.4,61.2,15.9,27.2,3,6.88,6.89,91,14,64.5 +Blood Donor,59,m,48.9,50.3,23.7,28.1,15.8,8.97,4.33,75,19,77.6 +Blood Donor,59,m,43.8,46.6,28.3,27.4,6.1,10.56,5.47,83,20.3,78.5 +Blood Donor,59,m,37.8,83.7,25.3,20,18.6,7.52,5.07,108,17.4,64.1 +Blood Donor,60,m,42.2,48.8,41,34.8,3.6,9.6,5.6,98,71.4,71.6 +Blood Donor,60,m,46.3,59.7,24.4,30.4,18,7.05,6.2,91,13.8,66.9 +Blood Donor,60,m,40.4,46.8,17.7,25.7,13.5,5.79,5.42,92,19.2,70 +Blood Donor,60,m,43.4,71.5,10.2,17.4,7.5,8.11,5.31,74,13.3,71.5 +Blood Donor,60,m,41.1,67.2,49,34.4,16.4,10.14,4.65,107,25.5,71.9 +Blood Donor,60,m,45.2,89.4,38.6,27.2,8.6,5.19,6.01,75,76.9,74.4 +Blood Donor,61,m,45.9,73.3,17.1,24.3,4.8,10.01,4.95,88,23.5,70.8 +Blood Donor,61,m,43.4,70.1,19.9,24,3.6,6.89,4.58,64,24.9,74.2 +Blood Donor,61,m,34.3,60.5,35.4,26.6,15.2,9.03,7.62,74,51.4,76 +Blood Donor,61,m,44.8,104.9,15,18.8,6.5,9.38,5.42,69,26.2,79.6 +Blood Donor,61,m,38.9,59.5,22.8,30.9,6.3,9.45,5.23,95,20.3,71.4 +Blood Donor,62,m,42.1,51.4,11.8,21.8,2.7,5.7,5.25,80,12.9,72 +Blood Donor,62,m,46.6,98,36.7,29.4,7,7.56,5.52,70,23.1,86.5 +Blood Donor,62,m,39.6,42.7,31.3,30.9,13.5,7.17,3.81,89,16.3,64.8 +Blood Donor,62,m,44.7,76.5,43.7,27.5,6.9,9.94,5.74,83,59.3,77.8 +Blood Donor,63,m,39.5,59.6,26.2,25.4,12.4,6.78,6.18,78,22.6,72.7 +Blood Donor,63,m,45.3,71.3,16.6,24.1,5.7,8.92,4.69,81,19.8,73.7 +Blood Donor,63,m,40.8,74.3,25,27.5,5.5,7.74,6.35,107,50.4,69.3 +Blood Donor,64,m,46,75.9,34.8,30.6,10.8,9.43,6.69,81,53.7,70.7 +Blood Donor,64,m,40.1,66.7,18.3,22.5,5.7,9.65,6.37,73,19.1,66 +Blood Donor,64,m,35.1,72.5,27.2,30.5,20.8,7.99,6.47,91,36.2,68.7 +Blood Donor,64,m,44.5,87.8,15.1,23.2,12.3,9.49,7.7,78,20,74.3 +Blood Donor,65,m,41,81.8,27.9,23.6,9.1,7.46,5.84,78,21.2,68.2 +Blood Donor,65,m,43.1,64.8,43.1,35.6,9.5,9.6,4.03,78,21.9,71.5 +Blood Donor,65,m,34.8,66.2,51.9,37.5,6.6,8.39,5.91,72,112.8,77 +Blood Donor,65,m,44.7,99.4,31.9,30.5,12.2,7.15,6.31,82,38.5,75.7 +Blood Donor,65,m,39.1,45.8,23.1,27.5,6.4,7,6.23,73,27.1,64.3 +Blood Donor,65,m,43.6,104,32.3,34.2,7.7,8.23,4.69,89,20.8,75.5 +Blood Donor,66,m,48.4,76,31.9,29.6,13.8,8.81,4.17,111,26.2,76.7 +Blood Donor,66,m,40.6,79.6,27,28,10.1,10.88,5.48,76,29.8,71.8 +Blood Donor,66,m,32.6,58.3,20.3,27.3,9.4,9.4,6.8,88,24.6,73.1 +Blood Donor,66,m,41,61.4,28.7,34.8,5.4,9.36,6.66,100,26.8,73.6 +Blood Donor,67,m,44.8,72.8,39.4,28.4,23.3,7.84,7.02,97,78.3,67.5 +Blood Donor,67,m,44.4,86.5,25.2,27.3,9.1,8.85,6.43,88,77.2,74.8 +Blood Donor,67,m,38.7,83.8,19.4,25.8,8.7,7.95,5.53,100,16.6,70.4 +Blood Donor,68,m,42.9,71.1,15,34.8,9.3,8.39,6.64,89,14.5,75.5 +Blood Donor,68,m,39.3,76.7,19.7,24.6,6.3,10.51,4.15,74,28.1,74.2 +Blood Donor,70,m,27.8,85.7,25.4,38.9,4.2,6.06,3.96,63,46,56.9 +Blood Donor,70,m,41,63.5,16.9,21.6,5.9,6.03,4.74,83,13.5,73.7 +Blood Donor,71,m,39,87.9,26.1,32.1,12.2,10.3,6.31,90,99.7,69.8 +Blood Donor,76,m,29.2,48.9,25.2,27.2,8.3,4.52,2.79,127,18.3,58.1 +Blood Donor,76,m,38,97.1,28.6,33.1,14.1,4.83,5.56,90,117.6,73.8 +Blood Donor,77,m,52.2,52.2,12,23.5,10.9,5.51,4.41,103,25.8,67.2 +Blood Donor,32,f,39.9,35.2,22,29.8,6.3,8.16,4.37,60,4.5,72.5 +Blood Donor,32,f,47.4,52.5,19.1,17.1,4.6,10.19,NA,63,23,72.2 +Blood Donor,32,f,41.1,42.8,10.1,14.1,23.2,6.08,3.75,53,9.3,68.9 +Blood Donor,32,f,43.5,66.2,9.2,17.8,5.7,7.14,4.38,71,44.6,76.1 +Blood Donor,33,f,36,77.5,14.8,22,4.4,8.61,5.26,66,13.1,66.1 +Blood Donor,33,f,36.9,51.7,17.4,22,8.3,7,5.02,52,19.1,72 +Blood Donor,33,f,44.3,74,49.7,52.3,8.5,6.49,3.34,73,44.7,73.8 +Blood Donor,33,f,38.1,35.2,11.9,18.3,3,6.09,5.22,76,15.4,72 +Blood Donor,33,f,41,61.1,27,28,6,8.36,4.93,70,24.7,70.5 +Blood Donor,33,f,38.2,54.4,17.3,21.2,7.1,8.67,5.69,68,32.1,66.9 +Blood Donor,33,f,47.6,95.5,18.8,22.2,2.4,7.84,5.57,71,16.9,75 +Blood Donor,33,f,42.4,137.2,14.2,13.1,3.4,8.23,NA,48,25.7,74.4 +Blood Donor,33,f,45.9,72,37.8,33.5,17.7,7.32,4.25,81,21.5,78.3 +Blood Donor,33,f,35.4,53.5,9.8,17.6,3.8,6,4.48,78,8,71.5 +Blood Donor,33,f,38.5,82.2,11.9,17,7.3,7.23,3.92,50,7,73.3 +Blood Donor,33,f,41.2,73.1,14.3,20.8,11.1,7.4,3.22,56,11.4,69.9 +Blood Donor,33,f,40.6,73.7,12.6,16.3,3.1,7.75,6.36,67,19.5,71.4 +Blood Donor,34,f,37.3,36.3,19.9,28.7,3.8,3.9,4.94,86,4.9,70.7 +Blood Donor,34,f,41.9,47.4,20.8,28.5,8,7.66,4.61,97,11.2,71.9 +Blood Donor,34,f,36.3,63.2,21.4,20.4,4.6,7.41,5.17,75,18.7,64.2 +Blood Donor,34,f,36.2,70.9,14.3,21.6,3.7,9.82,5.59,65,27.4,74.3 +Blood Donor,34,f,46.5,52.3,30.5,32.3,5.7,9.05,6.28,90,18.5,80.6 +Blood Donor,34,f,45.5,76.9,17.4,18.6,3.2,8.45,5.1,53,25.8,79.7 +Blood Donor,34,f,39.7,39.3,11.2,16.4,8.4,5.27,4.68,61,24.3,71.5 +Blood Donor,35,f,41,62.6,27.9,12,12.8,10.34,5.9,78,22.8,76.1 +Blood Donor,35,f,40.5,72.4,14.5,17.9,5.8,9.38,4.13,75,17.8,69.9 +Blood Donor,35,f,43.4,62.6,19.2,22,3,10.03,5.07,64,12.4,72.8 +Blood Donor,35,f,45.6,38.2,22,21.9,3,6.94,4.69,64,17.4,70.4 +Blood Donor,35,f,62.9,51.2,20.7,23,2.9,6.33,4.62,67,15.2,71.9 +Blood Donor,35,f,42,69,19.9,16.6,10.8,7.85,4.43,67,15.1,64.1 +Blood Donor,35,f,38,51.3,18.3,15.3,3.9,8.63,4.91,61,16.2,71.8 +Blood Donor,35,f,46.9,50.8,35.4,22.1,13.1,8.27,5.7,95,37.3,80.7 +Blood Donor,35,f,46.1,88.7,23.1,20,7.7,8.41,4.79,74,28.1,79.3 +Blood Donor,35,f,44.7,83.2,25.3,22.6,3.9,8.02,5.73,68,10.8,76.4 +Blood Donor,36,f,46.4,69.1,17.7,24.3,6.6,6.5,5.62,67,18.5,75.5 +Blood Donor,36,f,39.7,52,39.9,33.5,2.9,9,4.18,77,27,78.5 +Blood Donor,36,f,39.9,59,11.3,20.4,9.4,7.6,5.51,69,16,81 +Blood Donor,36,f,42.7,55.1,12.4,16,6.8,6.63,5.05,75,10.4,72.8 +Blood Donor,37,f,39.2,58.1,12,21,5.3,5.96,5.8,72,15.6,70.7 +Blood Donor,37,f,42.1,47.5,18.3,21.7,2.8,8.29,4.36,83,28.9,73.4 +Blood Donor,37,f,40.5,47.3,15.3,19.4,4.9,5.4,3.95,76,7.1,69 +Blood Donor,38,f,40,73.5,16.6,19.2,8.3,5.23,5.52,54,24,71 +Blood Donor,38,f,48.5,56.2,36,27.9,15.3,11.07,6.06,69,23.5,77.3 +Blood Donor,38,f,50.3,92.1,40.1,30.9,4.2,10.02,3.97,100,30.5,77.3 +Blood Donor,38,f,41.2,61.9,19.4,22.9,10.5,7.86,3.61,85,19.5,66.6 +Blood Donor,38,f,40.3,87.2,21.4,23.9,5.5,7.52,5.73,69,20.1,74 +Blood Donor,38,f,40,79.3,11.9,22,6.5,8.33,4.58,60,13.7,68.1 +Blood Donor,39,f,31.4,106,16.6,17,2.4,5.95,5.3,68,22.9,72.3 +Blood Donor,39,f,46.4,59.2,14.1,18.9,4.5,7.9,4.55,61,14.5,77.3 +Blood Donor,40,f,41.7,55.3,12.4,23.5,8.2,4.8,5.32,75,10.6,73.5 +Blood Donor,40,f,41.6,60.4,14.7,16.3,5.7,7.61,4.57,72,9.8,73.1 +Blood Donor,40,f,43.2,42.4,15.7,23.6,9.7,7.56,6.74,88,11.5,73.2 +Blood Donor,40,f,42.9,54.7,46.2,32.8,11.8,8.29,5.25,105,27.9,73.9 +Blood Donor,40,f,39.9,50.2,14.9,20.4,5.1,6.49,4.92,68,24.1,72.8 +Blood Donor,41,f,40.4,75.7,24.3,25.2,6,8.95,6.01,64,15.8,73 +Blood Donor,41,f,42.4,51.3,16.3,18.3,4,6.68,4.24,65,17.1,71.9 +Blood Donor,41,f,39.3,67.9,17.6,23.4,3.7,6.57,4.33,71,61,77.1 +Blood Donor,41,f,46.2,48.3,15.8,16.6,4.3,4.55,5.18,67,13,73.7 +Blood Donor,41,f,39.8,67.9,19.4,19.5,12.2,7.41,5.02,64,27.3,65.5 +Blood Donor,42,f,43.4,54,11.3,21.3,1.8,6.43,4.43,54,18.6,82.3 +Blood Donor,42,f,38.7,64.1,35.9,27.8,6,8.18,4.87,64,15.2,72.1 +Blood Donor,43,f,37.6,77.1,8.3,15.9,12.5,8.37,4.49,73,68.9,67.1 +Blood Donor,43,f,44.1,41.4,17.6,19.8,7.4,7.52,6.05,69,21.3,73.2 +Blood Donor,43,f,41.2,38.2,18.6,20.5,9.3,6.15,5.44,64,9.7,70.9 +Blood Donor,43,f,39,83.1,21.3,18.8,3.5,12.8,9.03,58,22.7,73.1 +Blood Donor,43,f,39,63,7.3,17.5,6.4,7.01,4.94,73,14.6,74.4 +Blood Donor,43,f,34.7,80.7,27,27.9,3.7,7.18,5.55,68,9.2,68.2 +Blood Donor,43,f,44.8,58.8,23,30.1,6.6,8.7,6.08,70,10.4,72.8 +Blood Donor,43,f,33.7,57.5,15.1,24.8,6.9,7.91,5.37,71,46,68.4 +Blood Donor,43,f,47.8,40.9,12.9,17.2,5.5,4.31,3.96,76,10.4,70.7 +Blood Donor,44,f,37.4,28.9,16.8,25.8,3.1,6.7,4.45,50,9.2,66.3 +Blood Donor,44,f,45.6,57.6,21,19.1,3.7,10.36,6.68,74,20.8,74.4 +Blood Donor,44,f,35.5,60,13.7,15,9.1,6.71,5.29,64,7.4,65.2 +Blood Donor,44,f,44,86.1,15.6,26.1,7.6,6.23,4.72,69,11.8,70.5 +Blood Donor,44,f,48.3,103.3,37.2,41.5,7,7.49,5.04,62,17.8,77.5 +Blood Donor,44,f,35.8,65.7,21.2,19.2,4.3,10.19,6.13,61,19.6,69.4 +Blood Donor,44,f,43,86.5,18.3,25,2.2,8.45,5.9,63,13.6,72.7 +Blood Donor,44,f,36.3,76.4,17.1,20.8,4.6,6.8,4.27,74,14.3,68 +Blood Donor,44,f,35.5,71,15.8,30,3.7,7.56,4.43,53,17.8,67.7 +Blood Donor,45,f,36.7,54.9,25.5,25.3,5.7,6.91,5.42,72,13.7,67.5 +Blood Donor,45,f,39.5,92.2,18.7,19.4,3.5,8.32,5.38,85,15.8,72.2 +Blood Donor,45,f,38.6,56,20.6,21.5,14.1,6.92,5.48,69,14.9,69.9 +Blood Donor,45,f,41.7,62.4,15.9,16.8,3.7,4.38,5.05,90,10,70.5 +Blood Donor,45,f,41.8,87.5,37.1,25.6,10.3,10.25,4.56,86,92.3,68.4 +Blood Donor,45,f,46.4,49.1,14.9,25.5,11,8.77,2.86,94,12,77.1 +Blood Donor,45,f,37.5,54.3,12.9,14.8,5.9,6.95,5.29,66,49.4,70.1 +Blood Donor,45,f,37,78.2,19.9,19.1,4.3,6.6,4.73,64,31.8,69.6 +Blood Donor,45,f,59.8,59.8,13.2,17.4,6.9,5.62,6.42,70,12.3,66.6 +Blood Donor,46,f,48.8,66.3,19.7,23.6,4.3,10.57,5.88,78,24.2,72.4 +Blood Donor,46,f,36.7,62.3,10.8,17.4,3.7,6.17,4.07,67,15.1,69 +Blood Donor,46,f,35.8,50.8,18.6,25.2,3.8,8.5,5.85,54,10.1,71.5 +Blood Donor,46,f,39.9,71.3,15.4,29.5,4.6,5.95,6.94,72,13.4,69.5 +Blood Donor,46,f,32.4,56.5,23.8,24.8,6.4,8.22,5.19,64,11,72 +Blood Donor,46,f,40.1,70.1,15.7,23.8,6,8.41,6.03,86,11,76.4 +Blood Donor,46,f,39.9,73.9,14,17.2,16.3,6.93,5.11,71,12.7,64.7 +Blood Donor,46,f,42.9,55.1,15.2,29.8,3.6,8.37,NA,61,29,71.9 +Blood Donor,46,f,41.1,47.5,21,17.7,7.1,7.55,4.42,62,11.9,69.8 +Blood Donor,46,f,42.3,61.9,20,21.8,9.7,8.8,7.09,67,35,75.2 +Blood Donor,46,f,37.9,59.5,33,25,3.7,6.06,5.3,92,43.9,70 +Blood Donor,46,f,36.8,113.2,31.2,24.8,3.8,9.6,4.83,78,19.9,72.7 +Blood Donor,46,f,51.3,84.1,40.6,43.6,9.2,7.1,5.62,62,74.9,77.1 +Blood Donor,47,f,35.6,37.5,17.6,18,4.7,4.32,5.59,75,13,66 +Blood Donor,47,f,38.7,43.4,12.1,19.4,3,5.35,4.6,56,15.7,67.8 +Blood Donor,47,f,36.4,42,11.1,18.3,7.2,4.97,5.47,74,7.9,67 +Blood Donor,47,f,34.6,54.1,10.2,15.3,8.4,6.5,5.1,76,10.6,67.7 +Blood Donor,47,f,40.3,65,13.5,15.2,6.4,7.16,4.55,70,16.5,66.2 +Blood Donor,48,f,45.6,107.2,24.4,39,13.8,9.77,NA,88,38,75.1 +Blood Donor,48,f,35.2,45.4,23.4,27.1,4.9,6.83,4.35,70,11.8,72.1 +Blood Donor,48,f,39.2,51,15.3,18.4,7.6,9.17,6.66,70,16.2,71.7 +Blood Donor,48,f,32,66.3,14.2,21.3,5.5,4.72,5.23,41,17.2,65.6 +Blood Donor,48,f,46.3,71.8,23,25.2,11.1,9.41,5.84,85,19.1,77.4 +Blood Donor,48,f,45.3,40.6,18.5,27.7,5.7,7.48,4.64,66,19.6,70.6 +Blood Donor,48,f,37.5,88.3,14.7,19.6,5.4,9.28,4.68,61,12.3,67.9 +Blood Donor,48,f,39.6,65.9,64.3,39.5,2.8,8.8,6.14,77,24.1,75.8 +Blood Donor,48,f,43.7,50.1,17.3,26.3,8.1,8.15,5.38,64,13.4,73.1 +Blood Donor,48,f,46.8,93.3,10,23.2,4.3,12.41,NA,52,23.9,72.4 +Blood Donor,48,f,42.5,62.2,12.1,20.1,23.1,4.01,5.58,67,13,74.2 +Blood Donor,48,f,44.4,52.5,16.4,23.4,4.5,9.06,6.78,74,10.3,73.1 +Blood Donor,48,f,44.4,64.5,17.2,21.1,17.3,4.93,4.05,67,12.1,71.2 +Blood Donor,48,f,38.8,43.9,12.8,13.3,8.6,5.63,5.31,66,21.4,63.2 +Blood Donor,49,f,38.8,120.2,25.2,21.5,12,8.29,7.11,52,18.6,70.7 +Blood Donor,49,f,40.9,58.4,20.1,26.5,2.6,9.77,5.99,53,12.7,73.9 +Blood Donor,49,f,39.1,89.4,15.4,24.1,4.1,10.03,8.36,74,12,68.1 +Blood Donor,49,f,39.3,59.4,18.3,15,4.8,8.03,4.58,83,12.5,74.3 +Blood Donor,49,f,40.5,31.3,16.2,19.4,11.2,4.95,5.1,75,14.9,73.2 +Blood Donor,49,f,34.9,37.9,15.3,19.4,7.1,5.3,5.88,83,7.9,62.5 +Blood Donor,49,f,45.4,45.9,14.3,15.9,5.8,9.05,6.81,69,14.5,78.2 +Blood Donor,49,f,43.3,71.5,28.4,26,6.2,7.68,5.91,77,19.1,76.9 +Blood Donor,50,f,36.9,50.6,15.5,21.7,2.1,9.97,7.01,68,17.6,68.4 +Blood Donor,50,f,47.6,77.2,24,17.5,4.8,9.27,6.41,77,21.3,73.5 +Blood Donor,50,f,42.5,74,10.9,23,4.6,9.42,6.33,76,9,68.8 +Blood Donor,50,f,39.9,80.5,24.2,22.8,5.2,9.25,7.41,84,19.4,71.2 +Blood Donor,50,f,41.2,45.6,25,25.1,4.1,7.16,6.54,64,15.4,68.2 +Blood Donor,50,f,34.6,63,24.7,31,12.8,6.55,5.95,70,25.4,70.2 +Blood Donor,51,f,46.3,70.5,75.2,69.2,6.1,7.68,4.73,69,52.2,78.3 +Blood Donor,51,f,37.8,80.7,32.9,27.3,6.2,7.29,5.26,68,27.1,70.6 +Blood Donor,51,f,38.3,52.9,12.4,16.5,3.8,7.22,5.43,55,12.7,70.2 +Blood Donor,51,f,39.6,63.5,17,25.9,7.3,7,6.97,72,10.4,71.9 +Blood Donor,51,f,38.3,67.7,17.4,19.9,5.4,7.5,4.94,72,7.9,69 +Blood Donor,51,f,43.1,52.2,18.5,24.1,8.1,7.52,6.76,60,17.4,71.9 +Blood Donor,51,f,41.4,136.9,33.2,20,5,10.27,6.24,77,106.7,72.2 +Blood Donor,51,f,41,81.2,35.1,33.3,5.2,8.9,6.56,70,21.7,72.4 +Blood Donor,51,f,46,71,18.1,23.8,59.1,5.99,3.25,82,11.4,73.3 +Blood Donor,51,f,39.6,43,16.3,19.6,7.5,5.57,4.96,83,62.1,72.9 +Blood Donor,51,f,47.4,117.3,62.1,30.4,3.8,10.43,6.59,86,69.3,71 +Blood Donor,51,f,43.7,61.3,18,23.3,4.3,9.57,6.04,70,18.6,75.2 +Blood Donor,52,f,45.5,78.2,25.2,27.7,4.7,8.65,6.04,65,28.3,70.5 +Blood Donor,52,f,43.8,52,15.5,23.9,6,7.93,5.41,69,11.9,72.4 +Blood Donor,52,f,36,47.2,19.6,22.5,5.9,7.85,5.69,85,30.4,69.2 +Blood Donor,52,f,41.7,58.3,22.9,26.7,11.4,9.17,4.33,75,14.5,77.1 +Blood Donor,52,f,40.2,89.7,26.8,19.1,6.9,9.1,7.04,75,41.6,67.4 +Blood Donor,52,f,51.5,81.8,26.3,20.6,7.8,6.74,5.9,88,16.3,82.2 +Blood Donor,52,f,36.7,87.6,34.3,30.8,17.7,10.12,6.98,72,24.2,66.3 +Blood Donor,52,f,41.3,77.4,16.6,22.2,5,7.57,7.8,66,10.8,70 +Blood Donor,52,f,38.2,70.3,19.5,17.5,2.7,10.02,6.17,65,35.5,71.1 +Blood Donor,53,f,42.4,55,20.9,42.4,7.7,6.6,4.26,67,14.2,70.9 +Blood Donor,53,f,39,76,25.9,20.7,2.8,11.11,6.38,66,50.1,70.8 +Blood Donor,53,f,40.1,84.6,23,22.1,7.1,8.4,5.16,70,82.6,74.6 +Blood Donor,53,f,43.7,84.3,18.8,18.6,8.5,10.22,6.65,56,16.5,76.9 +Blood Donor,53,f,43.5,61.7,16.9,20.3,7,7.19,6.97,74,12.3,69.2 +Blood Donor,53,f,41.1,91.7,13.8,19.6,3.4,7.87,5.48,72,77.3,77.3 +Blood Donor,53,f,38,84.7,23.5,19.8,10.8,7.3,4.82,62,11.4,68.5 +Blood Donor,53,f,47.4,66.9,24.3,32.6,14.9,10.51,6.03,81,25.2,75 +Blood Donor,53,f,51.3,84.1,40.6,43.6,9.2,7.1,5.62,62,74.9,77.1 +Blood Donor,54,f,39.9,30.7,17,19.3,6.3,6.99,4.95,68,13.3,70.7 +Blood Donor,54,f,26.2,72.9,28.5,28.8,5.5,7.49,4.91,58,27.6,57 +Blood Donor,54,f,44.5,53.4,13.4,17.5,5.4,7.74,5.55,66,15.4,71.9 +Blood Donor,54,f,39.9,61.2,23.3,24.5,4.5,9.22,5.47,69,58.2,65.8 +Blood Donor,54,f,43.3,76.5,19.9,22.4,7,8.04,6.77,67,17.7,73.3 +Blood Donor,55,f,41.9,86.1,19.3,22.3,7,8.21,5.05,71,32.5,75.2 +Blood Donor,55,f,36.2,101.3,19.2,21.9,4.9,6.5,5.86,66,12.3,70.3 +Blood Donor,55,f,35.2,69.6,23.6,26,3.8,4.97,4.43,69,29.4,60.5 +Blood Donor,55,f,39.9,83.6,18.4,27.4,8,8.43,7.67,73,13.6,73.3 +Blood Donor,56,f,39.7,66,14.2,20.8,3.5,7.48,5.88,66,7.2,67.2 +Blood Donor,56,f,39.5,86.9,22.5,22.2,4.7,9.4,6.02,66,13,72.6 +Blood Donor,56,f,34.7,90.3,22.7,21.6,3.5,8.07,5.45,67,9,69.4 +Blood Donor,56,f,36.6,102.3,13.5,14.9,8.4,6.94,5.5,65,16.2,71 +Blood Donor,56,f,33.2,54.3,15.5,22.8,8,5.61,3.87,55,19.1,63.1 +Blood Donor,56,f,45.1,79.1,39,30.5,5.2,6.47,5.1,64,145.3,66.7 +Blood Donor,56,f,39.9,83.8,19.1,23.6,4.3,7.61,6.06,72,16.4,67.1 +Blood Donor,57,f,48.4,94.4,2.5,39.6,2.3,8.84,NA,82,6.4,76.8 +Blood Donor,57,f,41.2,83.5,32.6,39.3,4,10.67,8.46,69,22.4,75.7 +Blood Donor,57,f,42.6,57.1,15,18.9,5.3,8.9,5.93,61,21.3,74.8 +Blood Donor,57,f,38.6,80.9,33.1,26.7,6.5,6.45,5.1,59,11.3,70.9 +Blood Donor,57,f,27.3,85.1,18.4,25.4,2.2,8.96,6.66,68,10.2,62.5 +Blood Donor,57,f,37.9,50.3,12.2,18.1,3.5,6.72,5.06,71,10,69.3 +Blood Donor,57,f,38.7,62.8,21.8,29.2,9.2,6.55,7.08,68,13,70.7 +Blood Donor,57,f,48,56.9,8.6,20.1,4.4,8.14,6.9,63,14.8,73.1 +Blood Donor,57,f,44.7,60.6,16.5,24.3,4.2,10.47,4.9,68,15.9,68.5 +Blood Donor,57,f,42.7,62.9,11.2,16.6,11.6,12.7,6.25,62,8.1,71.9 +Blood Donor,58,f,35.3,91.4,14.7,21,18,6.58,5.51,60,10.2,70.4 +Blood Donor,58,f,26.3,52.5,39.5,77.2,5.8,5.15,3.53,40,31.2,51 +Blood Donor,58,f,47,74.8,36.1,28.9,5.3,9.82,6.71,70,32,76.6 +Blood Donor,59,f,43,82.4,33.1,30,7.5,9.2,7.43,61,30.7,75.4 +Blood Donor,59,f,45.1,78,26,32.4,9,9.85,7.32,70,12.9,70.9 +Blood Donor,59,f,40,68.4,13.2,20.3,8.2,9.1,6.38,63,16.3,71.9 +Blood Donor,60,f,40.5,86,26,27.6,5.7,8.6,4.9,58,20.1,71.7 +Blood Donor,60,f,35.8,75.5,28.7,21.5,2,9.05,4.56,70,24,71.8 +Blood Donor,60,f,52.4,88.2,50.2,31.7,8.8,8.49,5,77,47,77 +Blood Donor,60,f,40.1,80.7,34.6,31.2,11.9,9.32,6.94,68,27.4,76.6 +Blood Donor,60,f,45.4,51.4,16.4,19,6.8,10.78,6.79,78,12.5,72.8 +Blood Donor,60,f,41.4,85.1,15.6,18.6,10,8.31,5.34,103,13,71.5 +Blood Donor,61,f,38.1,66.4,11.8,22.7,6.8,6.35,6.55,65,14.5,78.9 +Blood Donor,61,f,43.4,47.5,12.8,17.3,5.6,7.52,5.81,71,11.8,69.9 +Blood Donor,62,f,36.6,82.5,18.2,24.3,10.6,9.82,7.12,54,15.6,67.8 +Blood Donor,62,f,35.4,59.7,21.2,24.7,3.3,9,7.45,59,17.7,65.4 +Blood Donor,62,f,44,46.6,24.8,25.5,7.6,6.75,4.59,91,39.5,74.3 +Blood Donor,63,f,47.6,59.9,21.1,27.8,8.4,9.24,6.08,62,18.3,74.5 +Blood Donor,63,f,36.1,67.6,13.8,12.2,5.8,9.44,6.88,66,22.4,65 +Blood Donor,63,f,27.8,85.7,25.4,38.9,4.2,6.06,3.96,63,46,56.9 +Blood Donor,64,f,39.8,68.1,20.5,24.8,5.8,7.53,7.58,56,14.2,65.8 +Blood Donor,64,f,43,52.7,31.6,29.2,5.5,11.69,6,63,61.9,73.4 +Blood Donor,65,f,33,74.3,13.2,16.8,4.1,7.46,5.76,52,9.1,64.2 +Blood Donor,68,f,41.4,102.3,38.4,26.4,6.8,8.5,6.79,59,23.8,68.9 +Blood Donor,70,f,40,97.9,15.1,15.9,6.8,11.46,5.08,62,19.2,65.3 +Hepatitis,38,m,45,56.3,NA,33.1,7,9.58,6,77.9,18.9,63 +Hepatitis,19,m,41,NA,87,67,12,7.55,3.9,62,65,75 +Hepatitis,23,m,47,19.1,38.9,164.2,17,7.09,3.2,79.3,90.4,70.1 +Hepatitis,25,m,42,38.2,63.3,187.7,14,6,4.28,66.9,40.2,70.5 +Hepatitis,27,m,45,27.5,10.5,37.8,10,8.77,3.2,55.2,35.9,74.5 +Hepatitis,29,m,49,NA,53,39,15,8.79,3.6,79,37,90 +Hepatitis,30,m,45,NA,66,45,14,12.16,6.1,86,43,77 +Hepatitis,32,m,45,34.6,44.3,96.2,16,10.11,6.28,81.2,48.1,82.1 +Hepatitis,32,m,41,34.4,12.1,60.9,6,13.8,5.48,45.4,33.1,71.1 +Hepatitis,34,m,46,36.7,7.4,31.6,9,9.71,5.37,82.3,34.4,71.6 +Hepatitis,35,m,47,37.9,13.3,48.4,8,10.3,4.14,69.2,68.2,76 +Hepatitis,36,m,44,32.9,9.4,32,14,11.42,5.73,68.6,40.6,70.9 +Hepatitis,38,m,41,20.6,15.2,53.5,24,10.23,4.89,81.8,57.9,71.1 +Hepatitis,41,m,42,39.6,26.5,77.6,42,9.67,9.67,57.7,143.4,75.8 +Hepatitis,44,m,49,27.3,40.2,31.1,13,8.91,4.07,81.5,27.6,72.8 +Hepatitis,46,m,48,59.5,11.6,39,7,16.41,4.65,66.4,158.2,72.7 +Hepatitis,50,m,42,41.6,10.2,38.1,17,9.54,7.04,75.3,92.1,72.3 +Hepatitis,51,m,43,37.2,21.4,132.8,5,10.12,5.23,76.2,76.4,76.7 +Hepatitis,56,m,37,114,27.8,324,67,5.75,3.09,97.7,392.2,77.3 +Hepatitis,58,m,43,99.1,12.2,63.2,13,5.95,6.15,147.3,491,65.6 +Hepatitis,33,f,43,29.6,3.8,16.7,6,6.88,5.72,58.8,11.5,78.2 +Hepatitis,41,f,37,31.2,8.2,38.3,7,7.08,5.3,60.8,24.7,82.4 +Hepatitis,50,f,40,32.7,9,46,10,7.51,4.67,56.6,22.3,70.1 +Hepatitis,61,f,50,34.4,27.4,114.4,22,9.48,4.62,61.9,169.8,86 +Fibrosis,29,m,41,43.1,2.4,83.5,6,11.49,5.42,55.2,130,66.5 +Fibrosis,40,m,39,43.1,23.8,114.7,11,9.64,4.2,70.9,127.3,81.3 +Fibrosis,46,m,45,26.9,23.1,125,17,6.97,4.01,60.5,72.2,73 +Fibrosis,48,m,49,45.2,19.3,69.1,30,7.76,4.22,76.7,28.4,72.3 +Fibrosis,49,m,39,NA,118,62,10,7.28,3.5,72,74,81 +Fibrosis,49,m,46,NA,114,75,16,10.43,5.2,72,59,82 +Fibrosis,50,m,42,NA,258,106,15,8.74,4.7,77,80,84 +Fibrosis,53,m,46,NA,34,43,14,8.77,4,112,203,76 +Fibrosis,54,m,41,41.8,41.5,187.9,21,8.59,5.85,91,104.9,79.1 +Fibrosis,57,m,47,29.7,10.2,55.9,12,6.6,4.64,70.9,69.6,80.9 +Fibrosis,59,m,44,34.5,8.9,74.5,6,9.45,4.45,65,95.3,69.7 +Fibrosis,64,m,38,35.7,7.1,41.3,13,7.1,4.52,70,53,66.8 +Fibrosis,71,m,37,NA,130,90,15,9.92,4.7,79,77,76 +Fibrosis,36,f,46,39.3,67.1,161.9,13,9.24,4.81,65.3,60,73.9 +Fibrosis,38,f,40,39.8,14.9,68.9,11,8.55,4.31,60.5,40.1,76.5 +Fibrosis,57,f,43,52.1,8.3,35.8,18,8.61,6.19,71.4,27.9,82 +Fibrosis,68,f,43,22.9,5,42.1,12,7.29,4.89,80.9,11.9,76.1 +Fibrosis,49,f,39,NA,46,39,9,10.21,3.1,89,53,79 +Fibrosis,51,f,37,NA,164,70,9,3.99,4.2,67,43,72 +Fibrosis,56,f,39,NA,42,34,10,7.75,5,80,84,78 +Fibrosis,75,f,36,NA,114,125,14,6.65,NA,57,177,72 +Cirrhosis,38,m,44,NA,94,60,12,4.37,3.2,61,99,77 +Cirrhosis,39,m,34,137.8,4.8,35.6,9,3.65,4.82,519,133.4,57.5 +Cirrhosis,41,m,31,85.3,4.8,60.2,200,1.8,5.34,106.4,151,71.8 +Cirrhosis,42,m,36,69.6,14.9,263.1,40,3.61,3.93,49.6,61,68.6 +Cirrhosis,45,m,29,11.3,7.1,101.9,31,1.73,3.71,76.7,65.6,70 +Cirrhosis,46,m,20,NA,62,113,254,1.48,NA,114,138,NA +Cirrhosis,46,m,35,109.6,2.3,19.2,11,7.1,4.1,1079.1,105.6,69.1 +Cirrhosis,47,m,42,NA,159,102,11,6.29,5.5,58,201,79 +Cirrhosis,51,m,39,66,29.6,185,19,2,3.6,58.3,399.5,79.4 +Cirrhosis,51,m,33,29.6,4.5,66.6,91,4.02,4.08,75.9,28.5,62.3 +Cirrhosis,56,m,27,81.1,17,319.8,37,1.42,3.54,66.9,93.7,65.3 +Cirrhosis,56,m,23,105.6,5.1,123,43,1.8,2.4,62.7,35.9,62.8 +Cirrhosis,56,m,30,40.4,0.9,80.3,119,1.88,1.43,79.3,17.6,54.2 +Cirrhosis,58,m,31,143.1,7,181.8,58,3.29,3.92,66.4,273.7,78.1 +Cirrhosis,59,m,36,49.7,5.2,110.1,37,2.29,3.68,118.2,56.9,74.8 +Cirrhosis,59,m,27,73.8,4,65.2,209,2.47,3.61,71.7,28.5,60.6 +Cirrhosis,59,m,31,86.3,5.4,95.4,117,1.57,3.51,60.5,53.6,68.5 +Cirrhosis,61,m,39,102.9,27.3,143.2,15,5.38,4.88,72.3,400.3,73.4 +Cirrhosis,65,m,NA,NA,40,54,13,7.5,NA,70,107,79 +Cirrhosis,74,m,23,34.1,2.1,90.4,22,2.5,3.29,51,46.8,57.1 +Cirrhosis,42,f,33,79,3.7,55.7,200,1.72,5.16,89.1,146.3,69.9 +Cirrhosis,49,f,33,190.7,1.2,36.3,7,6.92,3.82,485.9,112,58.5 +Cirrhosis,52,f,39,37,1.3,30.4,21,6.33,3.78,158.2,142.5,82.7 +Cirrhosis,58,f,34,46.4,15,150,8,6.26,3.98,56,49.7,80.6 +Cirrhosis,59,f,39,51.3,19.6,285.8,40,5.77,4.51,136.1,101.1,70.5 +Cirrhosis,62,f,32,416.6,5.9,110.3,50,5.57,6.3,55.7,650.9,68.5 +Cirrhosis,64,f,24,102.8,2.9,44.4,20,1.54,3.02,63,35.9,71.3 +Cirrhosis,64,f,29,87.3,3.5,99,48,1.66,3.63,66.7,64.2,82 +Cirrhosis,46,f,33,NA,39,62,20,3.56,4.2,52,50,71 +Cirrhosis,59,f,36,NA,100,80,12,9.07,5.3,67,34,68 diff --git a/materials/worksheet_06/tests_worksheet_06.R b/materials/worksheet_06/tests_worksheet_06.R new file mode 100644 index 0000000..7510394 --- /dev/null +++ b/materials/worksheet_06/tests_worksheet_06.R @@ -0,0 +1,871 @@ +library(digest) +library(testthat) + + +## Question 1.1 +test_1.1 <- function() { + test_that('Did not assign answer to an object called "answer1.1"', { + expect_true(exists("answer1.1")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer1.1, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer1.1)) + if (answer_hash == "127a2ec00989b9f7faf671ed470be7f8") { + print("Think about the reason you took a sample from the population?") + } else if (answer_hash == "6e7a8c1c098e8817e3df3fd1b21149d1") { + print("Think about a dichotomous random variable, and a confidence interval for proportion.") + } else if (answer_hash == "d110f00cfb1b248e835137025804a23b") { + print("Think about a dichotomous random variable, and a confidence interval for proportion.") + } + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "ddf100612805359cd81fdc5ce3b9fbba") + }) + + print("Success!") +} + +## Question 1.2 +test_1.2 <- function() { + test_that('Did not assign answer to an object called "answer1.2"', { + expect_true(exists("answer1.2")) + }) + + test_that('Solution should be a string of size 11 containing only "A" and "B")', { + expect_match(answer1.2, "^[aA|bB]{11}$") + }) + + test_that("Solution is incorrect", { + expect_equal(digest(tolower(answer1.2)), "4d9d5967d6c5d04f8176e4b028de1e7f") + }) + + print("Success!") +} + +## Question 2.1 +test_2.1 <- function() { + test_that('Did not assign answer to an object called "boxplots"', { + expect_true(exists("boxplots")) + }) + properties <- c(boxplots$layers[[1]]$mapping, boxplots$mapping) + + test_that("Plot should have 'category' on the x-axis", { + expect_true("category" == rlang::get_expr(properties$x)) + }) + + test_that("Plot is not the correct type", { + expect_true("GeomBoxplot" %in% class(boxplots$layers[[1]]$geom)) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(boxplots$labels)) + }) + + print("Success!") +} + +## Question 2.2 +test_2.2 <- function() { + test_that('Solution should be a single character ("a", "b", "c", or "d")', { + expect_match(toString(answer2.2["No-Fibrosis"]), "a|b|c|d", ignore.case = TRUE) + }) + + test_that('Solution should be a single character ("a", "b", "c", or "d")', { + expect_match(toString(answer2.2["Fibrosis"]), "a|b|c|d", ignore.case = TRUE) + }) + + test_that('Solution should be a single character ("a", "b", "c", or "d")', { + expect_match(toString(answer2.2["Cirrhosis"]), "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(toString(answer2.2["No-Fibrosis"]))) + test_that("Solution is incorrect", { + expect_equal(answer_hash, 'ddf100612805359cd81fdc5ce3b9fbba') + }) + + answer_hash <- digest(tolower(toString(answer2.2["Fibrosis"]))) + test_that("Solution is incorrect", { + expect_equal(answer_hash, '6e7a8c1c098e8817e3df3fd1b21149d1') + }) + + answer_hash <- digest(tolower(toString(answer2.2["Cirrhosis"]))) + test_that("Solution is incorrect", { + expect_equal(answer_hash, '127a2ec00989b9f7faf671ed470be7f8') + }) + + print("Success!") +} + +## Question 2.3 +test_2.3 <- function() { + test_that('Did not assign answer to an object called "answer2.3"', { + expect_true(exists("answer2.3")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer2.3, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer2.3)) + if (answer_hash != "ddf100612805359cd81fdc5ce3b9fbba") { + print("We don't know yet if there is a reduction in albumin level or not. So, what would be the a 'no-change' hypothesis?") + } + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "ddf100612805359cd81fdc5ce3b9fbba") + }) + + print("Success!") +} + +## Question 2.4 +test_2.4 <- function() { + test_that('Did not assign answer to an object called "answer2.4"', { + expect_true(exists("answer2.4")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer2.4, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer2.4)) + if (answer_hash != "127a2ec00989b9f7faf671ed470be7f8") { + print("We are interested to see if there is a **reduction** in the albumin level.") + } + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "127a2ec00989b9f7faf671ed470be7f8") + }) + + print("Success!") +} + +# + +## Question 2.5 + +test_2.5_A <- function() { + test_that('Did not assign answer to an object called "answer2.5_A"', { + expect_true(exists("answer2.5_A")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(answer2.5_A)) + }) + + expected_colnames <- c("mean_albumin") + given_colnames <- colnames(answer2.5_A) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(answer2.5_A))), "4b5630ee914e848e8d07221556b0a2fb") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(answer2.5_A$mean_albumin))), "ade76ff25149e0df3a56010f12ea82fd") + }) + + print("Success!") +} + +test_2.5_B <- function(){ + test_that('Did not assign answer to an object called "answer2.5_B"', { + expect_true(exists("answer2.5_B")) + }) + + test_that('Solution should be "lower", "same", or "higher".', { + expect_match(answer2.5_B, "lower|same|higher", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer2.5_B)) + test_that("Solution is incorrect", { + expect_equal(answer_hash, "226b36da0f6c0424c28f826a4d22d8ac") + }) + + print("Success!") +} + +test_2.5_C <- function() { + test_that('Did not assign answer to an object called "answer2.5_C"', { + expect_true(exists("answer2.5_C")) + }) + + test_that('Solution should be "TRUE" or "FALSE"', { + expect_match(answer2.5_C, "true|false", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer2.5_C)) + if (answer_hash == "d2a90307aac5ae8d0ef58e2fe730d38b") { + print("Remember that we are assuming that we have the entire population.") + } + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "05ca18b596514af73f6880309a21b5dd") + }) + + print("Success!") +} + +# + +## Question 2.6 + +test_2.6 <- function() { + test_that('Did not assign answer to an object called "answer2.6"', { + expect_true(exists("answer2.6")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer2.6, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer2.6)) + if (answer_hash == "127a2ec00989b9f7faf671ed470be7f8") { + print("Remember that the median and the mean can be substantially different in asymmetric distributions.") + } else if (answer_hash == "6e7a8c1c098e8817e3df3fd1b21149d1") { + print("In general, the sample standard deviation will not be a good estimator for the populational mean.") + } else if (answer_hash == "d110f00cfb1b248e835137025804a23b") { + print("This is the value you want to compare your populational mean to. But first, you need to find a good quantity to estimate +your populational mean.") + } + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "ddf100612805359cd81fdc5ce3b9fbba") + }) + + print("Success!") +} + +# + +## Question 2.7 + +test_2.7 <- function() { + test_that('Did not assign answer to an object called "samp_dist_mean_albumin"', { + expect_true(exists("samp_dist_mean_albumin")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(samp_dist_mean_albumin)) + }) + + expected_colnames <- c("replicate", "stat") + given_colnames <- colnames(samp_dist_mean_albumin) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(samp_dist_mean_albumin))), "f1a30baa3072c1aad822c059f35c6841") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(samp_dist_mean_albumin$stat))), "0ddfbd766f51d105f1ee88218aaafdb5") + }) + + print("Success!") +} + +# + +## Question 2.8 + +test_2.8 <- function() { + test_that('Did not assign answer to an object called "obs_test_stat"', { + expect_true(exists("obs_test_stat")) + }) + + answer_as_numeric <- as.numeric(obs_test_stat) + test_that("Solution should be a number", { + expect_false(is.na(answer_as_numeric)) + }) + + test_that("Solution is incorrect", { + expect_equal(digest(answer_as_numeric), "839554b82f517488c1c16c8e32454e31") + }) + + print("Success!") +} + +# + +## Question 2.9 + +test_2.9 <- function() { + test_that('Did not assign answer to an object called "samp_dist_mean_albumin_plot"', { + expect_true(exists("samp_dist_mean_albumin_plot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(samp_dist_mean_albumin_plot)) + }) + + properties <- c(samp_dist_mean_albumin_plot$layers[[1]]$mapping, samp_dist_mean_albumin_plot$mapping) + + test_that("Plot should have stat on the x-axis", { + expect_true("stat" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomBar" %in% class(samp_dist_mean_albumin_plot$layers[[1]]$geom)) + + # Remove if not needed: + expect_true("GeomVline" %in% class(samp_dist_mean_albumin_plot$layers[[2]]$geom)) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(samp_dist_mean_albumin_plot$data)), "f1a30baa3072c1aad822c059f35c6841") + expect_equal(digest(round(sum(samp_dist_mean_albumin_plot$data$stat))), "df228a42984939c068b853b5dbbf3cc7") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(samp_dist_mean_albumin_plot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + print("Success!") +} + +# + +## Question 2.10 + +test_2.10 <- function() { + test_that('Did not assign answer to an object called "null_model"', { + expect_true(exists("null_model")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(null_model)) + }) + + expected_colnames <- c("replicate", "stat") + given_colnames <- colnames(null_model) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(null_model))), "f1a30baa3072c1aad822c059f35c6841") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(null_model$stat)*1000)), "df711bac668d817e415d347b90b67019") + }) + + print("Success!") +} + +# + +## Question 2.11 + +test_2.11 <- function() { + test_that('Did not assign answer to an object called "null_model_plot"', { + expect_true(exists("null_model_plot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(null_model_plot)) + }) + + properties <- c(null_model_plot$layers[[1]]$mapping, null_model_plot$mapping) + + test_that("Plot should have stat on the x-axis", { + expect_true("stat" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomBar" %in% class(null_model_plot$layers[[1]]$geom)) + + # Remove if not needed: + expect_true("GeomVline" %in% class(null_model_plot$layers[[2]]$geom)) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(null_model_plot$data)), "f1a30baa3072c1aad822c059f35c6841") + expect_equal(digest(round(sum(null_model_plot$data$stat))), "b803e9666348b8cc7fb63699366f3865") + + # If stat is not known: + # expect_equal(digest(round(sum(pull(null_model_plot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(null_model_plot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + print("Success!") +} + +# + +## Question 2.12 + +test_2.12 <- function() { + test_that('Did not assign answer to an object called "p_value"', { + expect_true(exists("p_value")) + }) + + answer_as_numeric <- as.numeric(p_value) + test_that("Solution should be a number", { + expect_false(is.na(answer_as_numeric)) + }) + + test_that("Solution is incorrect", { + expect_equal(digest(as.integer(answer_as_numeric * 10000)), "20e70f4a08bdc6a54e53ad0a7d1498b6") + }) + + print("Success!") +} + +# + +## Question 2.13 + +test_2.13 <- function() { + test_that('Did not assign answer to an object called "answer2.13"', { + expect_true(exists("answer2.13")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer2.13, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer2.13)) + if (answer_hash == "ddf100612805359cd81fdc5ce3b9fbba") { + print("Although there is a low probability. There's still a probability that under H0 you obtain a less or equal value + than the observed test statistic") + } else if (answer_hash == "d110f00cfb1b248e835137025804a23b") { + print("If you are confused about why this is not the correct answer, please ask the TAs or the instructor.") + } + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "6e7a8c1c098e8817e3df3fd1b21149d1") + }) + + print("Success!") +} + +# + +## Question 2.14 + +test_2.14 <- function() { + test_that('Did not assign answer to an object called "answer2.14"', { + expect_true(exists("answer2.14")) + }) + + test_that('Solution should be a single character ("A", "B")', { + expect_match(answer2.14, "a|b", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer2.14)) + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "127a2ec00989b9f7faf671ed470be7f8") + }) + + print("Success!") +} + +# + +## Question 2.15 + +test_2.15 <- function() { + test_that('Did not assign answer to an object called "answer2.15"', { + expect_true(exists("answer2.15")) + }) + + test_that('Solution should be a string of size between 1 and 4 containing only "A", "B", "C", "D", and "E")', { + expect_match(answer2.15, "^[aA|bB|cC|dD|eE]{1+}$") + }) + + test_that("Solution is incorrect", { + expect_equal(digest(tolower(answer2.15)), "0aa9c59ea893e51a8cc55e8ea353e592") + }) + + print("Success!") +} + +# + +## Question 2.16 + +test_2.16 <- function() { + test_that('Did not assign answer to an object called "null_model_infer"', { + expect_true(exists("null_model_infer")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(null_model_infer)) + }) + + expected_colnames <- c("replicate", "stat") + given_colnames <- colnames(null_model_infer) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(null_model_infer))), "f1a30baa3072c1aad822c059f35c6841") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(null_model_infer$stat))), "129297e2da029ee99482f60461ecbd31") + }) + + print("Success!") +} +# - + +test_2.17 <- function() { + test_that('Did not assign answer to an object called "null_model_vis_infer"', { + expect_true(exists("null_model_vis_infer")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(null_model_vis_infer)) + }) + + properties <- c(null_model_vis_infer$layers[[1]]$mapping, null_model_vis_infer$mapping) + + test_that("Plot should have stat on the x-axis", { + expect_true("stat" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomBar" %in% class(null_model_vis_infer$layers[[1]]$geom)) + + # Remove if not needed: + #expect_true("GeomVline" %in% class(null_model_vis_infer$layers[[2]]$geom)) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(null_model_vis_infer$data)), "f1a30baa3072c1aad822c059f35c6841") + expect_equal(digest(round(sum(null_model_vis_infer$data$stat))), "b803e9666348b8cc7fb63699366f3865") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(null_model_vis_infer$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(null_model_vis_infer$labels)) + }) + + print("Success!") +} + +# + +## Question 2.18 + +test_2.18 <- function() { + test_that('Did not assign answer to an object called "p_value_infer"', { + expect_true(exists("p_value_infer")) + }) + + answer_as_numeric <- as.numeric(p_value_infer) + test_that("Solution should be a number", { + expect_false(is.na(answer_as_numeric)) + }) + + test_that("Solution is incorrect", { + expect_equal(digest(as.integer(answer_as_numeric * 10e6)), "319413e7bdc61bf728528fe9eb02c0c8") + }) + + print("Success!") +} + +# + +## Question 3.1 + +test_3.1 <- function() { + test_that('Did not assign answer to an object called "answer3.1"', { + expect_true(exists("answer3.1")) + }) + + test_that('Solution should be a sequence of one up to four characters ("A", "B", "C", and/or "D")', { + expect_match(answer3.1, "^a?b?c?d?$", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer3.1)) + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "7ecaefcc6ebc9848e7cb04b5c783ae0a") + }) + + print("Success!") +} + +# + +## Question 3.2 + +test_3.2 <- function() { + test_that('Did not assign answer to an object called "answer3.2"', { + expect_true(exists("answer3.2")) + }) + + test_that('Solution should be a single character ("A", "B", "C", or "D")', { + expect_match(answer3.2, "a|b|c|d", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer3.2)) + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "ddf100612805359cd81fdc5ce3b9fbba") + }) + + print("Success!") +} + +# + +## Question 3.3 + +test_3.3 <- function() { + test_that('Did not assign answer to an object called "answer3.3"', { + expect_true(exists("answer3.3")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(answer3.3)) + }) + + expected_colnames <- c("neighbourhood_name", "median") + given_colnames <- colnames(answer3.3) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(answer3.3))), "c01f179e4b57ab8bd9de309e6d576c48") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(answer3.3$median))), "b225fb1495dbb5e5dfb3e327dceb7ab2") + }) + + print("Success!") +} + +# + +## Question 3.4 + +test_3.4 <- function() { + test_that('Did not assign answer to an object called "obs_med_diam_diff"', { + expect_true(exists("obs_med_diam_diff")) + }) + + answer_as_numeric <- as.numeric(obs_med_diam_diff) + test_that("Solution should be a number", { + expect_false(is.na(answer_as_numeric)) + }) + + test_that("Solution is incorrect", { + expect_equal(digest(as.integer(answer_as_numeric)), "515231903becc1906b20d1cade17fd44") + }) + + print("Success!") +} + +# + +## Question 3.5 + +test_3.5 <- function() { + test_that('Did not assign answer to an object called "null_model_trees"', { + expect_true(exists("null_model_trees")) + }) + + test_that("Solution should be a data frame", { + expect_true("data.frame" %in% class(null_model_trees)) + }) + + expected_colnames <- c("replicate", "stat") + given_colnames <- colnames(null_model_trees) + test_that("Data frame does not have the correct columns", { + expect_equal(length(setdiff( + union(expected_colnames, given_colnames), + intersect(expected_colnames, given_colnames) + )), 0) + }) + + test_that("Data frame does not contain the correct number of rows", { + expect_equal(digest(as.integer(nrow(null_model_trees))), "189e2f1b2fbb3743811990e9708c226a") + }) + + test_that("Data frame does not contain the correct data", { + expect_equal(digest(as.integer(sum(null_model_trees$stat))), "55f763ae917b48ca04293acf55b1cfde") + }) + + print("Success!") +} + +# + +## Question 3.6 + +test_3.6 <- function() { + test_that('Did not assign answer to an object called "trees_result_plot"', { + expect_true(exists("trees_result_plot")) + }) + + test_that("Solution should be a ggplot object", { + expect_true(is.ggplot(trees_result_plot)) + }) + + properties <- c(trees_result_plot$layers[[1]]$mapping, trees_result_plot$mapping) + + test_that("Plot should have stat on the x-axis", { + expect_true("stat" == rlang::get_expr(properties$x)) + }) + + test_that("Plot does not have the correct layers", { + expect_true("GeomBar" %in% class(trees_result_plot$layers[[1]]$geom)) + }) + + test_that("Plot does not use the correct data", { + expect_equal(digest(nrow(trees_result_plot$data)), "189e2f1b2fbb3743811990e9708c226a") + expect_equal(digest(round(sum(trees_result_plot$data$stat))), "0dca766ef4554c1915c0cbf3d7d78fcd") + + # If stat is not known: + # expect_equal(digest(round(sum(pull(trees_result_plot$data, rlang::get_expr(properties$x))))), "HASH_HERE") + }) + + test_that("x-axis label should be descriptive and human readable", { + expect_false(trees_result_plot$labels$x == toString(rlang::get_expr(properties$x))) + }) + + test_that("Plot should have a title", { + expect_true("title" %in% names(trees_result_plot$labels)) + }) + + print("Success!") +} + +# + +## Question 3.7 + +test_3.7 <- function() { + test_that('Did not assign answer to an object called "answer3.7"', { + expect_true(exists("answer3.7")) + }) + + answer_as_numeric <- as.numeric(answer3.7) + test_that("Solution should be a number", { + expect_false(is.na(answer_as_numeric)) + }) + + test_that("Solution is incorrect", { + expect_equal(digest(as.integer(answer_as_numeric * 1000)), "e0d4379f8d0680cb869f2438980de3e8") + }) + + print("Success!") +} + +# + +## Question 3.8 + +test_3.8 <- function() { + test_that('Did not assign answer to an object called "answer3.8"', { + expect_true(exists("answer3.8")) + }) + + test_that('Solution should be a single character ("A", "B", "C", "D", "E", or "F")', { + expect_match(answer3.8, "a|b|c|d|e|f", ignore.case = TRUE) + }) + + answer_hash <- digest(tolower(answer3.8)) + + test_that("Solution is incorrect", { + expect_equal(answer_hash, "93a9078c6326f37b481d3e99b60ad987") + }) + + print("Success!") +} +# - + +## Question 4.1 +test_4.1 <- function() { + test_that('Please skip this question, we will do them in tutorial_10', { + expect_true(1==1) + }) + + print("Please skip this question, we will do them in tutorial_10!") +} + +# + +## Question 4.2 + +test_4.2 <- function() { + test_that('Please skip this question, we will do them in tutorial_10', { + expect_true(1==1) + }) + + print("Please skip this question, we will do them in tutorial_10!") +} + +# + +## Question 4.3 + +test_4.3A <- function() { + test_that('Please skip this question, we will do them in tutorial_10', { + expect_true(1==1) + }) + + print("Please skip this question, we will do them in tutorial_10!") +} +test_4.3B <- function() { + test_that('Please skip this question, we will do them in tutorial_10', { + expect_true(1==1) + }) + + print("Please skip this question, we will do them in tutorial_10!") +} + +# + +## Question 4.4 + +test_4.4A <- function() { + test_that('Please skip this question, we will do them in tutorial_10', { + expect_true(1==1) + }) + + print("Please skip this question, we will do them in tutorial_10!") +} + +test_4.4B <- function() { + test_that('Please skip this question, we will do them in tutorial_10', { + expect_true(1==1) + }) + + print("Please skip this question, we will do them in tutorial_10!") +} +# - + +## Question 4.5 +test_4.5 <- function() { + test_that('Please skip this question, we will do them in tutorial_10', { + expect_true(1==1) + }) + + print("Please skip this question, we will do them in tutorial_10!") +} + +# + +## Question 4.6 + +## Question 4.6 + +test_4.6 <- function() { + test_that('Please skip this question, we will do them in tutorial_10', { + expect_true(1==1) + }) + + print("Please skip this question, we will do them in tutorial_10!") +} diff --git a/materials/worksheet_06/worksheet_06.ipynb b/materials/worksheet_06/worksheet_06.ipynb new file mode 100644 index 0000000..8edc45b --- /dev/null +++ b/materials/worksheet_06/worksheet_06.ipynb @@ -0,0 +1,2717 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "56cd85e38fd4d800970e38fcf8b79d0a", + "grade": false, + "grade_id": "cell-c386201f5323a017", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "# Worksheet 6: Hypothesis Testing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "8e9666b868267e66cb284b7acd67321c", + "grade": false, + "grade_id": "cell-63551cb2c79b3c82", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "### Lecture and Tutorial Learning Goals\n", + "After completing this week's lecture and tutorial work, you will be able to:\n", + "\n", + "1. Give an example of a question you could answer with a hypothesis test.\n", + "2. Differentiate composite vs. simple hypotheses.\n", + "3. Given an inferential question, formulate null and alternative hypotheses to be used in a hypothesis test.\n", + "4. Identify the steps and components of a basic hypothesis test (\"there is only one hypothesis test\").\n", + "5. Write computer scripts to perform hypothesis testing via simulation, randomization and bootstrapping approaches, as well as interpret the output.\n", + "6. Describe the relationship between confidence intervals and hypothesis testing.\n", + "7. Discuss the potential limitations of this simulation approach to hypothesis testing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4ea741d00f82a7c8c4f2489b97318e54", + "grade": false, + "grade_id": "cell-e4ddf503dcc46d63", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Run this cell before continuing.\n", + "library(cowplot)\n", + "library(digest)\n", + "library(gridExtra)\n", + "library(infer)\n", + "library(repr)\n", + "library(tidyverse)\n", + "library(datateachr)\n", + "source(\"tests_worksheet_06.R\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a95d2f88b97fe2b4e9015b6fbd10654d", + "grade": false, + "grade_id": "cell-6d14c502fafa7a1c", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## 1. Short Recap & Warm-Up Question\n", + "\n", + "The hypothesis testing problem is very similar to the confidence intervals problem you learned in Week 4. There is just a shift in focus. For confidence intervals, we want to find plausible values for the parameter given a sample. In hypothesis testing, we want to find \"plausible\" values for a statistic given a fixed value for the parameter. For example, given a sample average $\\bar{x}=2$, confidence intervals aim to find plausible values for the populational mean $\\mu$. On the other hand, hypothesis tests assume a population parameter, say $\\mu=2$, and aims to check if the obtained $\\bar{x}$ is \"compatible\" with that value.\n", + "\n", + "Before we start, let us refresh our memory on confidence intervals." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "71c16e8c50907230b4005c6256ac7f25", + "grade": false, + "grade_id": "cell-14b123c1602669dc", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.1**\n", + "
{points: 1}\n", + "\n", + "When calculating a confidence interval, we are looking to find plausible values for a:\n", + "\n", + "A. statistic;\n", + "\n", + "B. parameter;\n", + "\n", + "C. observations in the sample;\n", + "\n", + "D. observations in the population;\n", + "\n", + "_Assign your answer to an object called `answer1.1`. Your response should be a single character surrounded by quotes._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a119b5bb14ac40b29c509f956372643e", + "grade": false, + "grade_id": "cell-3bb33029a7df43f9", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# answer1.1 <- ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3f1f4d61fa3e9b34653215451bf6822e", + "grade": true, + "grade_id": "cell-43a9de8cf24eae44", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.1()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "fc64595af704cd06a2cdbf2a6f11b2d9", + "grade": false, + "grade_id": "cell-710e857eb0dc8ddc", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.2**\n", + "
{points: 1}\n", + "\n", + "Consider the population of all UBC students in a given year. We want the proportion of students that have at least one car. \n", + "\n", + "Complete the sentences below using one of the following two options:\n", + "\n", + "- `A` constant\n", + "- `B` random\n", + "\n", + "--------------------\n", + "\n", + "Before we take a sample:\n", + "\n", + "1. The elements of the sample are ...\n", + "2. The sample proportion is ...\n", + "3. The sample standard error is ...\n", + "4. The boundaries of a confidence interval are ...\n", + "5. The parameter $p$ is ...\n", + "\n", + "After we take the sample:\n", + "\n", + "6. The elements of the sample are ...\n", + "7. The sample proportion is ...\n", + "8. The sample standard error is ...\n", + "9. The boundaries of a confidence interval are ...\n", + "10. The parameter $p$ is ...\n", + "11. The elements of bootstrap samples are ...\n", + "\n", + "\n", + "\n", + "Your answer should be a string containing the letters associated with the terms in the same order as the sentences they complete. For example, one potential solution is \"AAABBAAABB\".\n", + "\n", + "_Assign your answer to an object called `answer1.2`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7d41d3050c35a34f204b831f42a1e01d", + "grade": false, + "grade_id": "cell-463a78463ef1620a", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# answer1.2 <- ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer1.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "284502e0e3eef5a3c75468ce95d96b20", + "grade": true, + "grade_id": "cell-5a41cab3328cd70e", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_1.2()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "120f5ddec97ad1ee867dab5674c9e7ee", + "grade": false, + "grade_id": "cell-8d19530a56cce591", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## 2. Introduction to Hypothesis Testing\n", + "\n", + "To introduce the idea of hypothesis testing, let us consider the Hepatitis C Virus (HCV) dataset. HCV is a virus that damages the liver. The HCV dataset contains several measurements obtained from blood tests at different stages of the disease, which are, in increasing order of severity: (1) No-Fibrosis; (2) Fibrosis; and (3) Cirrhosis. Let us take a look at the dataset first:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7f7e15eabf398c8a4d093fcbf45aea87", + "grade": false, + "grade_id": "cell-0a7267736a03a363", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "hcv_dataset <-\n", + " read_csv(\"data/hcv-data-set.csv\") %>% \n", + " mutate(category = fct_recode(category, \"No-Fibrosis\" = \"Hepatitis\")) %>% \n", + " filter(category != \"Blood Donor\") %>% \n", + " mutate(category = fct_drop(category))\n", + "\n", + "head(hcv_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "ea9548745c618713a01a7a0753042839", + "grade": false, + "grade_id": "cell-f0f472705a72b9af", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Albumin is a protein produced by the liver. Since Hepatitis C causes liver damage, one might suspect that people with hepatitis C would have lower albumin levels than healthy people. Although this is quite reasonable, is there enough evidence to support this claim? The difference in albumin levels might be so big (or so small) that we could easily answer this question with a simple plot. In other cases, however, the answer is not entirely clear. \n", + "\n", + "The medical community has established that the average level of albumin in people with a healthy liver is `44g/L`.\n", + "In the next exercise, you will start investigating the level of albumin in patients carrying HCV. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "4f289c6017fcc03370fe9d88de792267", + "grade": false, + "grade_id": "cell-981e17ae7a6a3d1a", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.1**\n", + "
{points: 1}\n", + "\n", + "Plot the boxplots of the `albumin` level for each stage of the disease by filling in the scaffolding below. Let us also add a line to represent the level of albumin in people with a healthy liver.\n", + "\n", + "_Assign your plot to an object called `boxplots`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "064ecf7b21c727696768dbab5ea9a9d8", + "grade": false, + "grade_id": "cell-4cdcaa7f5d2844a1", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#boxplots <- \n", + "# ... %>% \n", + "# ggplot(aes(x = ..., y = albumin)) + \n", + "# geom_...() + \n", + "# ylab(\"albumin g/L\") +\n", + "# ...(...) +\n", + "# theme(text = element_text(size=25)) +\n", + "# geom_hline(yintercept=..., color=\"blue\")\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "boxplots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "8868cd14f048ea08d7ea5e8fa200a6a0", + "grade": true, + "grade_id": "cell-142cda7b731649b9", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.1()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "db5e7189c107944c98763f5ba9694e3f", + "grade": false, + "grade_id": "cell-40cf0af5f074419f", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.2**\n", + "
{points: 1}\n", + "\n", + "Compare the boxplots of albumin levels in each group against the mean albumin level of people with healthy livers. Then, for each category, please select the statement you think is most suitable. \n", + "\n", + "Statements: \n", + "\n", + "a. The boxplot shows a sample distribution that is not compatible with a mean level of albumin of 44g/L. In other words, it would be very surprising (or unlucky!) to obtain such a sample distribution from a population with a mean level of albumin of 44g/L.\n", + "\n", + "b. The boxplot shows a sample distribution that is compatible with a mean level of albumin of 44g/L. \n", + "\n", + "c. The boxplot shows some indications that the sample does not come from a population with a mean albumin level of 44g/L. However, it is hard to tell if the distinction is due to the sampling variability or a real difference in the mean albumin level.\n", + "\n", + "_Assign the letter of the statement `\"a\"`, `\"b\"`, or `\"c\"`, for each of the groups in object `answer2.2`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c2c625eff063dccb05c062e08072dbf5", + "grade": false, + "grade_id": "cell-dd814fa3f33bc284", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "answer2.2 <- NULL\n", + "#answer2.2['No-Fibrosis'] <- \n", + "#answer2.2['Fibrosis'] <- \n", + "#answer2.2['Cirrhosis'] <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "38f3d8323869d7c006de1035ecde441c", + "grade": true, + "grade_id": "cell-ed48f83d6e0a09ee", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.2()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "093842c13645e9c039b2e3d6916e4a96", + "grade": false, + "grade_id": "cell-a30f3c627bfe96a3", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Note that we are not trying to find the plausible values for the true mean of the albumin level for each category based on our sample (which would be a confidence interval). Instead, we are trying to see if the sample we have is compatible with a hypothetical scenario of interest: the categories have the same mean level of albumin as people with a healthy liver. In other words, would it be plausible to obtain the sample we got if the hypothetical scenario of interest was real? \n", + "\n", + "Hypothesis testing is like a counter-proof. We are not trying to prove that a hypothetical scenario is real. We are checking if there is enough evidence in our sample to contradict the hypothesis (i.e., our sample is \"too incompatible\" with such a hypothetical scenario).\n", + "\n", + "But what is a hypothesis precisely? A hypothesis is a statement about the population. Some examples of hypotheses:\n", + "\n", + "1. The population is normally distributed.\n", + "2. The population mean, $\\mu$, is equal to a specified value $\\mu_0$.\n", + "3. The population proportion, $p$, is higher than a specified value of $p_0$. \n", + "\n", + "Although a hypothesis can be more general (like Example 1 above), hypotheses frequently refer to a population parameter such as mean, proportion, or variance. A hypothesis testing consists of two competing hypotheses: (1) $H_0$, the _null hypothesis_; and (2) $H_A$ (or $H_1$), the alternative hypothesis. The null hypothesis is generally the status quo, i.e., the hypothesis that no change has happened. It is assumed that $H_0$ and $H_A$ cover all the possible scenarios. (this means that either $H_0$ or $H_A$ is true -- but we do not know which). " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d7f501099596a8e97a1b532a575d83ab", + "grade": false, + "grade_id": "cell-adaf773afe499f58", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.3**\n", + "
{points: 1}\n", + "\n", + "In HCV dataset, we are investigating if the liver damage caused by the Hepatitis C Virus will reduce the albumin level compared to people with a healthy liver. What is the _null hypothesis_ we are testing? Where $\\mu$ = true mean albumin level of people with Hepatitis C. \n", + "\n", + "A. $H_0: \\mu<44g/L$\n", + "\n", + "B. $H_0: \\mu=44g/L$\n", + "\n", + "C. $H_0: \\mu>44g/L$\n", + "\n", + "D. $H_0: \\mu\\neq44g/L$\n", + "\n", + "\n", + "_Assign your answer to an object called `answer2.3`. Your answer should be one of `\"A\"`, `\"B\"`, `\"C\"`, or `\"D\"`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "05a2f4c5b1205efdb43a6fdc767e6532", + "grade": false, + "grade_id": "cell-50c85bb29980ef39", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.3 <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c52f3450cafe144623e66445901fe043", + "grade": true, + "grade_id": "cell-720a6790dabd7f53", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.3()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "e3fea7105f60005ea49470870c740ceb", + "grade": false, + "grade_id": "cell-f53bf5feaaa303da", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.4**\n", + "
{points: 1}\n", + "\n", + "In HCV dataset, we are investigating if the liver damage caused by the Hepatitis C Virus will reduce the albumin level compared to people with a healthy liver. What is the _alternative hypothesis_? Where $\\mu$ = true mean albumin level of people with Hepatitis C. \n", + "\n", + "A. $H_A: \\mu<44g/L$\n", + "\n", + "B. $H_A: \\mu=44g/L$\n", + "\n", + "C. $H_A: \\mu>44g/L$\n", + "\n", + "D. $H_A: \\mu\\neq44g/L$\n", + "\n", + "\n", + "_Assign your answer to an object called `answer2.4`. Your answer should be one of `\"A\"`, `\"B\"`, `\"C\"`, or `\"D\"`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "73e209498abc6e1e329fdef80f30674d", + "grade": false, + "grade_id": "cell-36a6bff87314be9c", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.4 <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4bc95c6816fd5a583425fde7ab58caf8", + "grade": true, + "grade_id": "cell-88ba1a92407fbf91", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.4()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "5839708c00c29bdd76a72583d203c08f", + "grade": false, + "grade_id": "cell-4eb9be4b25659fa5", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Now that you have your _null hypothesis_ and _alternative hypothesis_ defined, it is time to conduct the hypothesis test, i.e., to check if there is enough evidence in your data to say that the _null hypothesis_ is false. But first, we need to understand what is meant by \"enough evidence\". " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "603a65df139b26f017adbddb0504540f", + "grade": false, + "grade_id": "cell-b107f2eb2a6a1d81", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.5**\n", + "
{points: 3}\n", + "\n", + "For now, suppose that the `hcv_dataset` contains the entire population of patients with `Fibrosis`. Your job is to do three things: \n", + "\n", + "A. Fill in the code below to get the mean albumin level of patients with `Fibrosis`. \n", + "_Assign the result to an object called `answer2.5_A`_.\n", + "\n", + "B. Is the albumin level of patients with `Fibrosis` the same, lower, or higher than the albumin level of patients with healthy liver? _Assign the string \"lower\", \"same\", or \"higher\", to an object called `answer2.5_B`_.\n", + "\n", + "C. True or false: we can conclude with absolute certainty, just based on the mean value obtained in `Item A`, that the mean albumin level of patients with `Fibrosis` is lower than that of patients with a healthy liver. _Assign \"TRUE\" or \"FALSE\" to an object called `answer2.5_C`_ " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e3ea2913794212dea1851013d4ee7043", + "grade": false, + "grade_id": "cell-400fb89f00feab05", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.5_A <- \n", + "# hcv_dataset %>% \n", + "# filter(category == ...) %>% \n", + "# summarise(mean_albumin = ...)\n", + "\n", + "#answer2.5_B <- ...\n", + "#answer2.5_C <- ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.5_A\n", + "answer2.5_B \n", + "answer2.5_C" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "eaa7731a8f97f8f8a493ce789f949874", + "grade": true, + "grade_id": "cell-ca750fa0213a3a9c", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.5_A()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "35014870b86eb04016d4aa2de8302d5c", + "grade": true, + "grade_id": "cell-b251443d674f48d8", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.5_B()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f5ae293b3b850f6f006848273acb8c86", + "grade": true, + "grade_id": "cell-598a5727cd1cd67f", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.5_C()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "9b4a7d9781372b4da5b6f21870ff8752", + "grade": false, + "grade_id": "cell-44834540512a6784", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "However, contrary to the previous question's assumption, `hcv_dataset` **does not contain the entire population of patients with `Fibrosis`**. We do not have access to the entire population, so we cannot calculate the parameter of interest and compare it with the hypothesized value. The decision to reject or not reject $H_0$ will be based on a sample. The first thing we need to decide is the sample statistic that we will use to test the _null hypothesis_. This statistic is known as _test statistic_. A test statistic is a point estimate/sample statistic formula used for hypothesis testing." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "2a857cc2abee42366c341a42f21bad4e", + "grade": false, + "grade_id": "cell-4ccb5e1f2ff272fb", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.6**\n", + "
{points: 3}\n", + "\n", + "Considering the two hypotheses defined in Questions 2.3 and 2.4, which of the statistics below is adequate to be used as the test statistic?\n", + "\n", + "A. the sample median $Q_2$.\n", + "\n", + "B. the sample mean $\\bar{x}$.\n", + "\n", + "C. the sample standard deviation: $s=\\sqrt{\\frac{1}{n-1}\\sum_{i=1}^n\\left(x_i-\\bar{x}\\right)^2}$\n", + "\n", + "D. the constant $\\mu_0 = 44$\n", + "\n", + "_Assign your answer to an object called `answer2.6`. Your answer should be a single character surrounded by quotes._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "75b26a2bd156d219b700f10a6c16083c", + "grade": false, + "grade_id": "cell-aa173f406c5f76bd", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# answer2.6 <- ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.6" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "ea802c9f6e9da6c107478490092f61b9", + "grade": true, + "grade_id": "cell-99a05ff3bf3d3010", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.6()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "757aa6da76296cba0b93e8159c545008", + "grade": false, + "grade_id": "cell-3eabc3b367db3a2d", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.7**\n", + "
{points: 3}\n", + "\n", + "A difficulty that arises is that the test statistic is dependent on the sample. Different samples provide different values for the test statistic. Therefore, we cannot just compare the test statistic's value with the hypothesized value of 44 g/L of albumin.\n", + "\n", + "In this exercise, you will obtain the bootstrapped sampling distribution of your test statistic using 10,000 replications. \n", + "\n", + "_Assign your answer to an object called `samp_dist_mean_albumin`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4fe6542c9c8faadc6d4ee7e48d7321d5", + "grade": false, + "grade_id": "cell-2b596aa28283a4b5", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(7) # Do not change this!\n", + "\n", + "fibrosis <- \n", + " hcv_dataset %>% \n", + " filter(category == \"Fibrosis\") \n", + "\n", + "#samp_dist_mean_albumin <- \n", + "# fibrosis %>% \n", + "# specify(response = ...) %>% \n", + "# generate(type = ..., reps = ...) %>% \n", + "# calculate(stat = ...)\n", + "\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(samp_dist_mean_albumin)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c4419387a82f692fc070c387b685714d", + "grade": true, + "grade_id": "cell-e517af59db67cd28", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.7()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "f0d7dff9415d16a507579a159c5366df", + "grade": false, + "grade_id": "cell-534fe94c5ad08e89", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.8**\n", + "
{points: 1}\n", + "\n", + "Fill in the code below to obtain the observed test statistic.\n", + "\n", + "_Assign your answer to an object named `obs_test_stat`_." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "71295f32981777381dc949f706c72bde", + "grade": false, + "grade_id": "cell-0e2236a9137d0c08", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#obs_test_stat <- ...(fibrosis$albumin)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "obs_test_stat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "168c7b3ca223611a0a74f250bc44fa1a", + "grade": true, + "grade_id": "cell-4cfc9bf55e3007fe", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.8()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "85f208fb2bc7a9d25026e0b6b8fa3d91", + "grade": false, + "grade_id": "cell-5c7d1b43c64e2140", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.9**\n", + "
{points: 3}\n", + "\n", + "Fill in the code below to plot the bootstrap sampling distribution you obtained in Question 2.7.\n", + "\n", + "_Assign your answer to an object called `samp_dist_mean_albumin_plot`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "edf77f759bd3d00423a437d3aa72c59e", + "grade": false, + "grade_id": "cell-ad2ae437dc251c1a", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#samp_dist_mean_albumin_plot <- \n", + "# ... %>% \n", + "# ggplot() + \n", + "# geom_...(aes(..), bins = 15, color=\"white\") +\n", + "# geom_vline(xintercept = obs_test_stat, color = \"red\", alpha=.3, lwd=2) + \n", + "# xlab(...) + \n", + "# theme(text = element_text(size=25)) + \n", + "# ggtitle(\"Bootstrapped sampling dist.\", subtitle = \"Mean albumin level \") +\n", + "# annotate(\"text\", x = 43.6, y = 2150, label = \"Observed test statistic\", color=\"red\", size=7)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "samp_dist_mean_albumin_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f2d2059290c04714b48e57d791bedad1", + "grade": true, + "grade_id": "cell-aa367db5ce3307ea", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.9()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "7e104ac7f1e1d74f4b882b6ce2321ff7", + "grade": false, + "grade_id": "cell-9b286f02e09aef4b", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.10: the Null model**\n", + "
{points: 3}\n", + "\n", + "The test statistic's sampling distribution under the _null hypothesis_ ($H_0$) is called _null model_ or _null distribution_. Up to now, you have been studying the sampling distribution of a statistic using bootstrap simulation. This time you will use bootstrap to examine how the sampling distribution of your test statistic would look like if $H_0$ were true. \n", + "\n", + "The _null hypothesis_ states that the population mean is 44 g/L, which implies that, under $H_0$, the mean of the test statistic's sampling distribution is 44 g/L. Your job here is to obtain the _null model_ by recentering the sampling distribution stored in `samp_dist_mean_albumin` to 44 g/L.\n", + "\n", + "_Assign your answer to an object named `null_model`_." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "fe82ca800065832a333ca888e04896c5", + "grade": false, + "grade_id": "cell-4f5a2f488ea65f69", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#null_model <- \n", + " #samp_dist_mean_albumin %>% \n", + " #mutate(...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(null_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "042b65cfdf821efcf38b80a11756db5e", + "grade": true, + "grade_id": "cell-3b4192310ddeb76c", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.10()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "ee136607aec214b13c3bce6629898045", + "grade": false, + "grade_id": "cell-0d4feaa2be943a8f", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.11: the Null model**\n", + "
{points: 3}\n", + "\n", + "Fill in the code below to plot the _null model_ you obtained in the previous question. Also, add a vertical line to the plot at the observed value of the test statistic. \n", + "\n", + "_Assign your answer to an object called `null_model_plot`_." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "5aaf7fc8da268d20a14ed525987a6eaa", + "grade": false, + "grade_id": "cell-0399578e172109d3", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#null_model_plot <-\n", + "# null_model %>% \n", + "# ggplot() +\n", + "# geom_...(..., bins = 15, color=\"white\") +\n", + "# geom_vline(xintercept = obs_test_stat, color = \"red\", alpha=.3, lwd=2) + \n", + "# xlab(\"Mean albumin level (g/L)\") + \n", + "# theme(text = element_text(size=25)) + \n", + "# ggtitle(\"Simulated null distribution\", subtitle = \"Mean albumin level\") +\n", + "# annotate(\"text\", x = 43.6, y = 2200, label = \"Observed test statistic\", color=\"red\", size=7)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "null_model_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d9ec21cceb2e91507861821a6fddf941", + "grade": true, + "grade_id": "cell-facc05c4de74cf40", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.11()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "4ac96035bef676f9e1e57de8aed82a44", + "grade": false, + "grade_id": "cell-2c1cb5c383d5a16d", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.12: p-value**\n", + "
{points: 3}\n", + "\n", + "Is the observed value of the test statistic a plausible value to be obtained if $H_0$ were true? To answer this question, you will calculate the probability of getting a value more \"extreme\" than the observed test statistic under the null distribution. This probability is called _p-value_. \n", + "\n", + "_Assign your answer to an object called p_value_." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "8bbd500649d7695582bcb744cc8297e2", + "grade": false, + "grade_id": "cell-09c7dc2a6bccdfcd", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#p_value <- mean(null_model$stat < ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "p_value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "458568dfac5fa9344145213611cdb1c5", + "grade": true, + "grade_id": "cell-155a227436cbec99", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.12()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "64322ca009f49a8d3e11bc13b847d134", + "grade": false, + "grade_id": "cell-639f0185ddda5751", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.13: Decision**\n", + "
{points: 3}\n", + "\n", + "Based on the _p-value_ you got in the previous question, which of the following do you think better describes the situation?\n", + "\n", + "A. The low value of the p-value shows that it is entirely plausible to obtain the observed test statistic if $H_0$ were true. Therefore, $H_0$ should not be rejected.\n", + "\n", + "B. The low value of the p-value shows that it is quite unlikely to get the observed test statistic if $H_0$ were true, which _certainly_ shows that $H_0$ is false. Therefore, $H_0$ should be rejected. \n", + "\n", + "C. The low value of the p-value shows that it is quite unlikely to get the observed test statistic if $H_0$ were true, which _suggests_ that $H_0$ is false. Therefore, $H_0$ should be rejected. \n", + "\n", + "D. The _p-value_ is quite low, and since the _p-value_ is the probability that $H_0$ is true, we should reject $H_0$. \n", + "\n", + "_Assign your answer to an object called `answer2.13`. Your answer should be a single character surrounded by quotes._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "ba237dd158c20ff3055577fc46b331be", + "grade": false, + "grade_id": "cell-20732de06b1c49f8", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.13 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.13" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c3a269856dd85d7a1391676b61022525", + "grade": true, + "grade_id": "cell-326c1016109197da", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.13()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "41bace3bc3f2fac5dfb1b02c6fcfb422", + "grade": false, + "grade_id": "cell-e98cd4facee6b769", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.14: Types of Error**\n", + "
{points: 3}\n", + "\n", + "There are two possible errors we could make in hypothesis testing:\n", + "\n", + "1. Type I Error: happens when we wrongly reject $H_0$ (i.e., we reject $H_0$ when $H_0$ is true);\n", + "2. Type II Error: happens when we wrongly do not reject $H_0$ (i.e., we do not reject $H_0$ when $H_0$ is False);\n", + "\n", + "\n", + "\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Truth
$H_0$ is true$H_A$ is true
DecisionReject $H_0$ Type I error Correct decision
Do not reject $H_0$ Correct decision Type II error
\n", + "\n", + "Considering the decision you made in Question 2.13, which type of error are you at risk of making? \n", + "\n", + "A. Type I Error\n", + "\n", + "B. Type II Error\n", + "\n", + "_Assign your answer to an object called `answer2.14`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7922cdcfe05c5018a5aef32824611f6e", + "grade": false, + "grade_id": "cell-dfac9b69c95ae6c4", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.14 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.14" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "420f99336df7ca5bd284c515f61ab21e", + "grade": true, + "grade_id": "cell-d7f3be238ddfef27", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.14()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "042f165bd70b22f933eb1a9a663811be", + "grade": false, + "grade_id": "cell-3ba3a59fbc0569b3", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.15: Significance level**\n", + "
{points: 3}\n", + "\n", + "We know that a low _p-value_ is evidence against $H_0$. But how low must the _p-value_ be for us to decide to reject $H_0$? \n", + "\n", + "When performing hypothesis testing, we must set the so-called _significance level_. The significance level, $\\alpha$, is the probability of _Type I Error_. We will reject $H_0$ if the p-value is smaller than the significance level we chose. Typical values of $\\alpha$ are $10\\%, 5\\%$, and $1\\%$. It is important that you specify the $\\alpha$ level before conducting the hypothesis testing and obtaining the p-value. \n", + "\n", + "For this question, you must select all the significance level below for which we would reject $H_0$:\n", + "\n", + "A. $10\\%$\n", + "\n", + "B. $5\\%$\n", + "\n", + "C. $1\\%$\n", + "\n", + "D. $0.1\\%$\n", + "\n", + "E. None of the above.\n", + "\n", + "Your answer should be a string containing the letters associated with the items you selected in the same order as the items appear. For example, if you want to select `B` and `D`, you should use `\"BD\"`, not `\"DB\"`. \n", + "\n", + "_Assign your answer to an object called `answer2.15`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f23b6598313983a25156216ca528e150", + "grade": false, + "grade_id": "cell-f1c0bff98d2e9722", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer2.15 <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer2.15" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f628099b37831f99fde95c4f846a13a3", + "grade": true, + "grade_id": "cell-36a4f9055f2c66d9", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.15()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "05efadc38af1249e959b7c37fd53e8fa", + "grade": false, + "grade_id": "cell-025b3df4013a6d61", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.16: hypothesis testing with the `infer` package**\n", + "
{points: 3}\n", + "\n", + "The `infer` package has a similar [workflow to conduct hypothesis tests](https://moderndive.com/9-hypothesis-testing.html#ht-infer) to the one you have been using for confidence intervals. \n", + "\n", + "In this question, you will conduct the same hypothesis test you just did manually, but this time you are going to use the `infer` package. Fill in the code below to generate 10,000 bootstrap samples from the null model using the `infer` workflow. Then, try comparing the first ten rows of the model you manually generated `null_model` with the first ten rows you obtained using `infer`\n", + "\n", + "_Assign your answer to an object called `null_model_infer`_. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "ed0ce780ccbf94a405cd3f43cc4ebe2e", + "grade": false, + "grade_id": "cell-1dfa0051161db7c5", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(7) # Do not change this.\n", + "\n", + "#null_model_infer <- \n", + "# fibrosis %>% \n", + "# specify(...) %>% \n", + "# hypothesise(...) %>% \n", + "# generate(...) %>% \n", + "# calculate(...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(null_model_infer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4b540ce4f20353ef27cdb93aae6bf9a3", + "grade": true, + "grade_id": "cell-532a26a12c781427", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.16()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "220dc8d1c2fbbd2c23621103156deb60", + "grade": false, + "grade_id": "cell-e9fe9bed81d549c0", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.17: Visualizing the result of the hypothesis test `infer`**\n", + "
{points: 3}\n", + "\n", + "\n", + "The `infer` package also makes it easy to visualize the result of your hypothesis test with `visualize` and `shade_p_value` functions (see [Section 9.3.1 of Modern Dive](https://moderndive.com/9-hypothesis-testing.html#ht-infer)).\n", + "\n", + "Fill in the code below to visualize the result of your hypothesis test. \n", + "\n", + "_Assign the answer to an object named `null_model_vis_infer`_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "0fd38ffa47ecc4f4c83f1a48d8f092da", + "grade": false, + "grade_id": "cell-602979ab19235af9", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#null_model_vis_infer <-\n", + "# null_model_infer %>% \n", + "# visualize(...) + \n", + "# shade_p_value(obs_stat = ..., direction = ...) +\n", + "# xlab(\"Mean albumin level (g/L)\") + \n", + "# theme(text = element_text(size=20))\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "null_model_vis_infer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "95a07f3b584065836a17dc3fc9aee2fd", + "grade": true, + "grade_id": "cell-a9ca1ae0a796bb53", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.17()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "2ea5710e547295287691f2f641e8ae79", + "grade": false, + "grade_id": "cell-6c878d761af53f37", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.18: Getting the p-value with `infer`**\n", + "
{points: 1}\n", + "\n", + "To get the p-value with the `infer` package, we use the `get_p_value` function. Obtain the p-value using the `null_model_infer`. \n", + "\n", + "_Assign your answer to p_value_infer_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "135442a48704995df10a0a1d3606b231", + "grade": false, + "grade_id": "cell-d8de10198a9915ac", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#p_value_infer <- \n", + "# null_model_infer %>% \n", + "# get_p_value(obs_stat = ..., direction = ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "p_value_infer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9c6e32c4fa6ebb713b626a6d0d6e7230", + "grade": true, + "grade_id": "cell-878da68d39c876b0", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_2.18()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "b76a4eef7ff2f70dca19eaa51555e4cd", + "grade": false, + "grade_id": "cell-ea0a9087eace5aa7", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## 3. Hypothesis testing with two populations\n", + "\n", + "In the previous section, the hypothesis involved only one parameter from one population: the mean level of albumin in patients with `Fibrosis`. \n", + "\n", + "In this section, you will work with two populations, and your hypothesis will involve a parameter from each population. Although it seems more complicated, fear not, the process is pretty much the same: \n", + "\n", + "1. Specify the variable of interest (`specify()`);\n", + "2. Define your hypotheses (`hypothesise()`);\n", + "3. Simulate the observations (`generate()`);\n", + "4. Generate values from the null model (`calculate()`);\n", + "5. See how the observed statistic compares with the sampling distribution by checking the _p_-value.\n", + "\n", + "In the following sequence of exercises, we will try to answer the following question: \n", + "\n", + "> Is the diameter of the trees in Kitsilano bigger than in Kerrisdale?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "b931c90bb926263dcc01c49114fa5a29", + "grade": false, + "grade_id": "cell-0d0aedd671460715", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.1: What are we comparing exactly?**\n", + "
{points: 2}\n", + "\n", + "Different trees will have different diameters. We need a summary quantity to summarise each population: (1) all the trees in Kitsilano; and (2) all the trees in Kerrisdale. Which of the following quantities are appropriate to help answer the question (select all that apply):\n", + "\n", + "A. Population mean;\n", + "\n", + "B. Population variance;\n", + "\n", + "C. Population median;\n", + "\n", + "D. Population mode; \n", + "\n", + "_Assign your answer to an object called `answer3.1`. Your response should be a sequence of characters, e.g., \"ABCD\"._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b8c3ad088d3edfac03247f11e313cbf7", + "grade": false, + "grade_id": "cell-76fa433852f954db", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer3.1 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer3.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "fbb9d68098890a95176c2dfb34631894", + "grade": true, + "grade_id": "cell-ba8c3c631688bd08", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.1()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "6ae106334094d0bc9881f2ceae1c72d0", + "grade": false, + "grade_id": "cell-326ab3e29761c7f2", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.2: Setting the hypothesis**\n", + "
{points: 2}\n", + "\n", + "If we were to use the median, say $m_1$ is the median diameter of the trees in Kitsilano and $m_2$ the median diameter of the trees in Kerrisdale, what are the appropriate hypotheses?\n", + "\n", + "A. $H_0: m_1-m_2 = 0$ vs $H_1: m_1-m_2 < 0$\n", + "\n", + "B. $H_0: m_1-m_2 = 0$ vs $H_1: m_1-m_2 > 0$\n", + "\n", + "C. $H_0: m_1-m_2 > 0$ vs $H_1: m_1-m_2 < 0$\n", + "\n", + "D. $H_0: m_1-m_2 < 0$ vs $H_1: m_1-m_2 = 0$\n", + "\n", + "_Assign your answer to an object called `answer3.2`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "dd53a47e682fe642108fdf35b09ec98f", + "grade": false, + "grade_id": "cell-3604ac09f4c211ec", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer3.2 <-\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer3.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "43d9f5dfda8017cb0305ccda9eda79e4", + "grade": true, + "grade_id": "cell-512cb2e17bb87a53", + "locked": true, + "points": 2, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.2()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "5ae67b7b0d53b01abd54e75ab47cccd0", + "grade": false, + "grade_id": "cell-f24659bc1cf72fe5", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.3: The truth!**\n", + "
{points: 3}\n", + "\n", + "Let us assume that the entire population of trees in Kitsilano and Kerrisdale is stored in the `trees_pop` variable. Let's find out the truth! \n", + "What is the median diameter of each population of trees? Fill in the code below to find out.\n", + "\n", + "_Assign your answer to an object called `answer3.3`._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "32ff2edffff6ec3c3c2909ccfea27bd8", + "grade": false, + "grade_id": "cell-ac532d6c217b508a", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Run this code before continuing\n", + "trees_pop <-\n", + " vancouver_trees %>% \n", + " filter(neighbourhood_name %in% c(\"KITSILANO\", \"KERRISDALE\")) %>% \n", + " select(neighbourhood_name, diameter)\n", + "\n", + "head(trees_pop)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d5db397a42ea31153233047aff11f04c", + "grade": false, + "grade_id": "cell-0e6359010285ebe4", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer3.3 <- \n", + "# trees_pop %>% \n", + "# ...(neighbourhood_name) %>% \n", + "# ...(median = ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer3.3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "98df68bf9e7cda69425016dadc07eac8", + "grade": true, + "grade_id": "cell-8321b4d518a311c9", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.3()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a9a72629055139b6c57d7aa8094a5e33", + "grade": false, + "grade_id": "cell-9a765025a4ac8c86", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.4: Let's take a sample**\n", + "
{points: 3}\n", + "\n", + "A sample of 31 trees from Kitsilano and 35 trees from Kerrisdale was taken and stored in the object `sample_trees`. Calculate the observed test statistic as the difference in the median diameter between KITSILANO & KERRISDALE\n", + "\n", + "_Assign your answer to an object called `obs_med_diam_diff`_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "28d9e7aeec06f4880aeb5fdd781212b0", + "grade": false, + "grade_id": "cell-b0191856865c1d44", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(12) # Do not change this\n", + "\n", + "# Run this cell before continuing.\n", + "sample_trees <- \n", + " trees_pop %>% \n", + " filter(neighbourhood_name==\"KERRISDALE\") %>% \n", + " sample_n(size=35) %>% \n", + " bind_rows(\n", + " trees_pop %>% \n", + " filter(neighbourhood_name==\"KITSILANO\") %>% \n", + " sample_n(size=31))\n", + "\n", + "head(sample_trees)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "524ce0d9bc8126de716a278bebe9c84f", + "grade": false, + "grade_id": "cell-1c5277cac7748862", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# obs_med_diam_diff <- \n", + "# sample_trees %>% \n", + "# filter(neighbourhood_name %in% c(..., ...)) %>%\n", + "# group_by(...) %>% \n", + "# summarise(median = ...) %>%\n", + "# pivot_wider(names_from = neighbourhood_name, values_from = median) %>%\n", + "# transmute(diff = ...) %>%\n", + "# pull(...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "obs_med_diam_diff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "1e01b5f699103dae864f1d55a8ee0017", + "grade": true, + "grade_id": "cell-eeb2724c79c477c6", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.4()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a650a676546959942fccfabacbb5f66e", + "grade": false, + "grade_id": "cell-95e24759c31bc5eb", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.5: Simulating from the null distribution**\n", + "
{points: 3}\n", + "\n", + "Fill in the code below to generate 5000 samples from the null distribution.\n", + "\n", + "_Assign your answer to an object called `null_model_trees`_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "1cb6da7c48796f558bd534fd0ede7451", + "grade": false, + "grade_id": "cell-fe80212fccaf0f1f", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(50)\n", + "\n", + "#null_model_trees <- \n", + "# sample_trees %>% \n", + "# specify(formula = ... ~ ...) %>% \n", + "# hypothesize(null = ...) %>% \n", + "# ...(reps = 5000, type = \"permute\") %>% \n", + "# ...(stat=\"diff in medians\", order = c(\"KITSILANO\", \"KERRISDALE\"))\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "head(null_model_trees)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "430e916eb178d575984c53bf9e903f46", + "grade": true, + "grade_id": "cell-784331f2e3adbe88", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.5()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "56464ea9c4bec8597d7ea6cccb50ee56", + "grade": false, + "grade_id": "cell-812681a3cd2d2834", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.6**\n", + "
{points: 3}\n", + "\n", + "Fill in the code below to plot the result of the hypothesis test. \n", + "\n", + "_Assign your answer to an object called `trees_result_plot`_." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "0d615da6eca63dcca51da04063aaafd5", + "grade": false, + "grade_id": "cell-523e9ef063ba782d", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#trees_result_plot <- \n", + "# null_model_trees %>%\n", + "# visualize() + \n", + "# shade_p_value(obs_stat = ..., direction = ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "trees_result_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "37640a8dcda48398fc6628b60649fe58", + "grade": true, + "grade_id": "cell-b177015e5b570b86", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.6()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "9b3e2ca87d729b78738d795beeae8122", + "grade": false, + "grade_id": "cell-c8e9e6051344a05e", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.7**\n", + "
{points: 3}\n", + "\n", + "Obtain the p-value from `null_model_trees`.\n", + "\n", + "_Assign your answer to an object called `answer3.7`_." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "914a2f7c711aac912e4af9a6c456c389", + "grade": false, + "grade_id": "cell-c4f1639bbbe64d41", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer3.7 <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer3.7" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "abb3a2223d934faed270c84726c9fb24", + "grade": true, + "grade_id": "cell-ede0354b1285433c", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "test_3.7()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Question 3.8**\n", + "
{points: 3}\n", + "\n", + "What decision should be made at 10% significance level?\n", + "\n", + "A. Reject $H_0$ and commit the Type I Error;\n", + "\n", + "B. Reject $H_0$ and commit the Type 2 Error;\n", + "\n", + "C. Correctly reject $H_0$;\n", + "\n", + "D. Not reject $H_0$ and commit the Type 1 Error;\n", + "\n", + "E. Not reject $H_0$ and commit the Type 2 Error;\n", + "\n", + "F. Correctly not reject $H_0$;\n", + "\n", + "_Assign your answer to an object called `answer3.5`_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3fa99601e48343140a4e1bd300ec6021", + "grade": false, + "grade_id": "cell-cb42623aee11340d", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#answer3.8 <- \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "\n", + "answer3.8" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3b6dd65634557f32915ee6bcd9b55ef2", + "grade": true, + "grade_id": "cell-68c4131a9c491e83", + "locked": true, + "points": 3, + "schema_version": 3, + "solution": false, + "task": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "test_3.8()" + ] + } + ], + "metadata": { + "docker": { + "latest_image_tag": "v0.4.0" + }, + "jupytext": { + "formats": "ipynb,Rmd" + }, + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.2.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}