diff --git a/.nojekyll b/.nojekyll index fec702f..d7bdd33 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -50e3d627 \ No newline at end of file +a4dd7030 \ No newline at end of file diff --git a/schedule/slides/17-nonlinear-classifiers.html b/schedule/slides/17-nonlinear-classifiers.html index b056529..f8a4a0c 100644 --- a/schedule/slides/17-nonlinear-classifiers.html +++ b/schedule/slides/17-nonlinear-classifiers.html @@ -397,7 +397,7 @@

17 Nonlinear classifiers

Stat 406

Daniel J. McDonald

-

Last modified – 26 October 2023

+

Last modified – 30 October 2023

\[ \DeclareMathOperator*{\argmin}{argmin} \DeclareMathOperator*{\argmax}{argmax} @@ -716,6 +716,32 @@

knn.cv() (leave one out)

I would use the largest (odd) k that is close to the minimum.
This produces simpler, smoother, decision boundaries.

+
+

Alternative (using deviance loss, I think this is right)

+
+
+Code +
dev <- function(y, prob, prob_min = 1e-5) {
+  y <- as.numeric(as.factor(y)) - 1 # 0/1 valued
+  m <- mean(y)
+  prob_max <- 1 - prob_min
+  prob <- pmin(pmax(prob, prob_min), prob_max)
+  lp <- (1 - y) * log(1 - prob) + y * log(prob)
+  ly <- (1 - y) * log(1 - m) + y * log(m)
+  2 * (ly - lp)
+}
+knn.cv_probs <- function(train, cl, k = 1) {
+  o <- knn.cv(train, cl, k = k, prob = TRUE)
+  p <- attr(o, "prob")
+  o <- as.numeric(as.factor(o)) - 1
+  p[o == 0] <- 1 - p[o == 0]
+  p
+}
+dev_err <- map_dbl(1:kmax, ~ mean(dev(dat1$y, knn.cv_probs(dat1[, -1], dat1$y, k = .x))))
+
+
+ +

Final version

@@ -723,26 +749,26 @@

Final version

Code -
kopt <- max(which(err == min(err)))
-kopt <- kopt + 1 * (kopt %% 2 == 0)
-gr$opt <- knn(dat1[, -1], gr[, 1:2], dat1$y, k = kopt)
-tt <- table(knn(dat1[, -1], dat1[, -1], dat1$y, k = kopt), dat1$y, dnn = c("predicted", "truth"))
-ggplot(dat1, aes(x1, x2)) +
-  theme_bw(base_size = 24) +
-  scale_shape_manual(values = c("0", "1"), guide = "none") +
-  geom_raster(data = gr, aes(x1, x2, fill = opt), alpha = .6) +
-  geom_point(aes(shape = y), size = 4) +
-  coord_cartesian(c(-2.5, 3), c(-2.5, 3)) +
-  scale_fill_manual(values = c(orange, green), labels = c("0", "1")) +
-  theme(
-    legend.position = "bottom", legend.title = element_blank(),
-    legend.key.width = unit(2, "cm")
-  )
+
kopt <- max(which(err == min(err)))
+kopt <- kopt + 1 * (kopt %% 2 == 0)
+gr$opt <- knn(dat1[, -1], gr[, 1:2], dat1$y, k = kopt)
+tt <- table(knn(dat1[, -1], dat1[, -1], dat1$y, k = kopt), dat1$y, dnn = c("predicted", "truth"))
+ggplot(dat1, aes(x1, x2)) +
+  theme_bw(base_size = 24) +
+  scale_shape_manual(values = c("0", "1"), guide = "none") +
+  geom_raster(data = gr, aes(x1, x2, fill = opt), alpha = .6) +
+  geom_point(aes(shape = y), size = 4) +
+  coord_cartesian(c(-2.5, 3), c(-2.5, 3)) +
+  scale_fill_manual(values = c(orange, green), labels = c("0", "1")) +
+  theme(
+    legend.position = "bottom", legend.title = element_blank(),
+    legend.key.width = unit(2, "cm")
+  )
-

+

diff --git a/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-6-1.svg b/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-6-1.svg index 22a006e..a6c8754 100644 --- a/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-6-1.svg +++ b/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-6-1.svg @@ -175,27 +175,27 @@ - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + diff --git a/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-8-1.svg b/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-8-1.svg new file mode 100644 index 0000000..3cd3bff --- /dev/null +++ b/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-8-1.svg @@ -0,0 +1,226 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-9-1.svg b/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-9-1.svg new file mode 100644 index 0000000..1f0f880 --- /dev/null +++ b/schedule/slides/17-nonlinear-classifiers_files/figure-revealjs/unnamed-chunk-9-1.svg @@ -0,0 +1,589 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schedule/slides/18-the-bootstrap.html b/schedule/slides/18-the-bootstrap.html index dec905f..38f8b18 100644 --- a/schedule/slides/18-the-bootstrap.html +++ b/schedule/slides/18-the-bootstrap.html @@ -397,7 +397,7 @@

18 The bootstrap

Stat 406

Daniel J. McDonald

-

Last modified – 11 October 2023

+

Last modified – 30 October 2023

\[ \DeclareMathOperator*{\argmin}{argmin} \DeclareMathOperator*{\argmax}{argmax} @@ -493,10 +493,29 @@

Bootstrap procedure

\(\hat{F}\) is the “empirical” distribution of the bootstraps.

+
+

Empirical distribution

+
+
+Code +
r <- rexp(50, 1 / 5)
+ggplot(tibble(r = r), aes(r)) + 
+  stat_ecdf(colour = orange) +
+  geom_vline(xintercept = quantile(r, probs = c(.05, .95))) +
+  geom_hline(yintercept = c(.05, .95), linetype = "dashed") +
+  annotate(
+    "label", x = c(5, 12), y = c(.25, .75), 
+    label = c("hat(F)[boot](.05)", "hat(F)[boot](.95)"), 
+    parse = TRUE
+  )
+
+ +
+

Very basic example

\[\frac{\sqrt{n}(\bar{X}-E[X])}{s} \approx N(0, 1).\]

@@ -510,19 +529,19 @@

Code -
ggplot(data.frame(x = c(0, 12)), aes(x)) +
-  stat_function(fun = function(x) dexp(x, 1 / 5), color = orange) +
-  geom_vline(xintercept = 5, color = blue) + # mean
-  geom_vline(xintercept = qexp(.5, 1 / 5), color = red) + # median
-  annotate("label",
-    x = c(2.5, 5.5, 10), y = c(.15, .15, .05),
-    label = c("median", "bar(x)", "pdf"), parse = TRUE,
-    color = c(red, blue, orange), size = 6
-  )
+
ggplot(data.frame(x = c(0, 12)), aes(x)) +
+  stat_function(fun = function(x) dexp(x, 1 / 5), color = orange) +
+  geom_vline(xintercept = 5, color = blue) + # mean
+  geom_vline(xintercept = qexp(.5, 1 / 5), color = red) + # median
+  annotate("label",
+    x = c(2.5, 5.5, 10), y = c(.15, .15, .05),
+    label = c("median", "bar(x)", "pdf"), parse = TRUE,
+    color = c(red, blue, orange), size = 6
+  )
-
+

Now what…

-
set.seed(406406406)
-x <- rexp(n, 1 / 5)
-(med <- median(x)) # sample median
+
set.seed(406406406)
+x <- rexp(n, 1 / 5)
+(med <- median(x)) # sample median
[1] 3.611615
-
B <- 100
-alpha <- 0.05
-Fhat <- map_dbl(1:B, ~ median(sample(x, replace = TRUE))) # repeat B times, "empirical distribution"
-CI <- 2 * med - quantile(Fhat, probs = c(1 - alpha / 2, alpha / 2))
+
B <- 100
+alpha <- 0.05
+Fhat <- map_dbl(1:B, ~ median(sample(x, replace = TRUE))) # repeat B times, "empirical distribution"
+CI <- 2 * med - quantile(Fhat, probs = c(1 - alpha / 2, alpha / 2))
@@ -548,23 +567,23 @@

Code -
ggplot(data.frame(Fhat), aes(Fhat)) +
-  geom_density(color = orange) +
-  geom_vline(xintercept = CI, color = orange, linetype = 2) +
-  geom_vline(xintercept = med, col = blue) +
-  geom_vline(xintercept = qexp(.5, 1 / 5), col = red) +
-  annotate("label",
-    x = c(3.15, 3.5, 3.75), y = c(.5, .5, 1),
-    color = c(orange, red, blue),
-    label = c("widehat(F)", "true~median", "widehat(median)"),
-    parse = TRUE
-  ) +
-  xlab("x") +
-  geom_rug(aes(2 * med - Fhat))
+
ggplot(data.frame(Fhat), aes(Fhat)) +
+  geom_density(color = orange) +
+  geom_vline(xintercept = CI, color = orange, linetype = 2) +
+  geom_vline(xintercept = med, col = blue) +
+  geom_vline(xintercept = qexp(.5, 1 / 5), col = red) +
+  annotate("label",
+    x = c(3.15, 3.5, 3.75), y = c(.5, .5, 1),
+    color = c(orange, red, blue),
+    label = c("widehat(F)", "true~median", "widehat(median)"),
+    parse = TRUE
+  ) +
+  xlab("x") +
+  geom_rug(aes(2 * med - Fhat))
-
+

How does this work?

@@ -578,14 +597,14 @@

Slightly harder example

-
ggplot(fatcats, aes(Bwt, Hwt)) +
-  geom_point(color = blue) +
-  xlab("Cat body weight (Kg)") +
-  ylab("Cat heart weight (g)")
+
ggplot(fatcats, aes(Bwt, Hwt)) +
+  geom_point(color = blue) +
+  xlab("Cat body weight (Kg)") +
+  ylab("Cat heart weight (g)")
-

+

@@ -593,8 +612,8 @@

Slightly harder example

-
cats.lm <- lm(Hwt ~ 0 + Bwt, data = fatcats)
-summary(cats.lm)
+
cats.lm <- lm(Hwt ~ 0 + Bwt, data = fatcats)
+summary(cats.lm)

 Call:
@@ -614,7 +633,7 @@ 

Slightly harder example

Multiple R-squared: 0.965, Adjusted R-squared: 0.9648 F-statistic: 3947 on 1 and 143 DF, p-value: < 2.2e-16
-
confint(cats.lm)
+
confint(cats.lm)
       2.5 %   97.5 %
 Bwt 3.829836 4.078652
@@ -628,12 +647,12 @@

When we fit models, we examine diagnostics

-
qqnorm(residuals(cats.lm), pch = 16, col = blue)
-qqline(residuals(cats.lm), col = orange, lwd = 2)
+
qqnorm(residuals(cats.lm), pch = 16, col = blue)
+qqline(residuals(cats.lm), col = orange, lwd = 2)
-

+

@@ -643,21 +662,21 @@

When we fit models, we examine diagnostics

We bootstrap

-
B <- 500
-alpha <- .05
-bhats <- map_dbl(1:B, ~ {
-  newcats <- fatcats |>
-    slice_sample(prop = 1, replace = TRUE)
-  coef(lm(Hwt ~ 0 + Bwt, data = newcats))
-})
-
-2 * coef(cats.lm) - # Bootstrap CI
-  quantile(bhats, probs = c(1 - alpha / 2, alpha / 2))
+
B <- 500
+alpha <- .05
+bhats <- map_dbl(1:B, ~ {
+  newcats <- fatcats |>
+    slice_sample(prop = 1, replace = TRUE)
+  coef(lm(Hwt ~ 0 + Bwt, data = newcats))
+})
+
+2 * coef(cats.lm) - # Bootstrap CI
+  quantile(bhats, probs = c(1 - alpha / 2, alpha / 2))
   97.5%     2.5% 
 3.826735 4.084322 
-
confint(cats.lm) # Original CI
+
confint(cats.lm) # Original CI
       2.5 %   97.5 %
 Bwt 3.829836 4.078652
@@ -688,21 +707,21 @@

Same data

Non-parametric bootstrap

Same as before

-
B <- 500
-alpha <- .05
-bhats <- map_dbl(1:B, ~ {
-  newcats <- fatcats |>
-    slice_sample(prop = 1, replace = TRUE)
-  coef(lm(Hwt ~ 0 + Bwt, data = newcats))
-})
-
-2 * coef(cats.lm) - # NP Bootstrap CI
-  quantile(bhats, probs = c(1 - alpha / 2, alpha / 2))
+
B <- 500
+alpha <- .05
+bhats <- map_dbl(1:B, ~ {
+  newcats <- fatcats |>
+    slice_sample(prop = 1, replace = TRUE)
+  coef(lm(Hwt ~ 0 + Bwt, data = newcats))
+})
+
+2 * coef(cats.lm) - # NP Bootstrap CI
+  quantile(bhats, probs = c(1 - alpha / 2, alpha / 2))
   97.5%     2.5% 
 3.832907 4.070232 
-
confint(cats.lm) # Original CI
+
confint(cats.lm) # Original CI
       2.5 %   97.5 %
 Bwt 3.829836 4.078652
@@ -717,20 +736,20 @@

Same data

  • The \(\epsilon_i\) is random \(\longrightarrow\) just resample \(\widehat{e}_i\).
  • -
    B <- 500
    -bhats <- double(B)
    -cats.lm <- lm(Hwt ~ 0 + Bwt, data = fatcats)
    -r <- residuals(cats.lm)
    -bhats <- map_dbl(1:B, ~ {
    -  newcats <- fatcats |> mutate(
    -    Hwt = predict(cats.lm) +
    -      sample(r, n(), replace = TRUE)
    -  )
    -  coef(lm(Hwt ~ 0 + Bwt, data = newcats))
    -})
    -
    -2 * coef(cats.lm) - # Parametric Bootstrap CI
    -  quantile(bhats, probs = c(1 - alpha / 2, alpha / 2))
    +
    B <- 500
    +bhats <- double(B)
    +cats.lm <- lm(Hwt ~ 0 + Bwt, data = fatcats)
    +r <- residuals(cats.lm)
    +bhats <- map_dbl(1:B, ~ {
    +  newcats <- fatcats |> mutate(
    +    Hwt = predict(cats.lm) +
    +      sample(r, n(), replace = TRUE)
    +  )
    +  coef(lm(Hwt ~ 0 + Bwt, data = newcats))
    +})
    +
    +2 * coef(cats.lm) - # Parametric Bootstrap CI
    +  quantile(bhats, probs = c(1 - alpha / 2, alpha / 2))
       97.5%     2.5% 
     3.815162 4.065045 
    @@ -775,7 +794,7 @@

    Types of intervals

    Types of intervals

    Let \(\hat{\theta}\) be our sample statistic, \(\hat{\Theta}\) be the resamples

    \[ -[\hat{\theta} - t_{\alpha/2}\hat{s},\ \hat{\theta} - t_{\alpha/2}\hat{s}] +[\hat{\theta} - t_{\alpha/2}s,\ \hat{\theta} - t_{\alpha/2}s] \]

    where \(\hat{s} = \sqrt{\Var{\hat{\Theta}}}\)


    diff --git a/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-1-1.svg b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-1-1.svg index efa4791..d5b0a50 100644 --- a/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-1-1.svg +++ b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-1-1.svg @@ -3,66 +3,81 @@ - + - + - + - + - + - + - + - - + + - - + + - - + + - + + + + + + + + + + + + + - + - + - + + + + - - + + - - + + - - + + - + - - + + - + - + @@ -70,67 +85,67 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -143,176 +158,191 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + - + + + + + + + + - - - + + - - + + - - + + - - + + - - + + - - + + + + + - - - + + - - - - + + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - - - - - - - - - - - - - - - - + + + + - - - - + + + + + + + + + + + + + + - - - + - - - + + - - - - + + - - - - + + - + - + diff --git a/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-2-1.svg b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-2-1.svg new file mode 100644 index 0000000..efa4791 --- /dev/null +++ b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-2-1.svg @@ -0,0 +1,318 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-5-1.svg b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-5-1.svg new file mode 100644 index 0000000..904d760 --- /dev/null +++ b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-5-1.svg @@ -0,0 +1,929 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-7-1.svg b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-7-1.svg new file mode 100644 index 0000000..566060d --- /dev/null +++ b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-7-1.svg @@ -0,0 +1,458 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-9-1.svg b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-9-1.svg new file mode 100644 index 0000000..c3ab81b --- /dev/null +++ b/schedule/slides/18-the-bootstrap_files/figure-revealjs/unnamed-chunk-9-1.svg @@ -0,0 +1,412 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/search.json b/search.json index 42d04c1..85a422d 100644 --- a/search.json +++ b/search.json @@ -186,7 +186,7 @@ "href": "schedule/slides/18-the-bootstrap.html#meta-lecture", "title": "UBC Stat406 2023W", "section": "18 The bootstrap", - "text": "18 The bootstrap\nStat 406\nDaniel J. McDonald\nLast modified – 11 October 2023\n\\[\n\\DeclareMathOperator*{\\argmin}{argmin}\n\\DeclareMathOperator*{\\argmax}{argmax}\n\\DeclareMathOperator*{\\minimize}{minimize}\n\\DeclareMathOperator*{\\maximize}{maximize}\n\\DeclareMathOperator*{\\find}{find}\n\\DeclareMathOperator{\\st}{subject\\,\\,to}\n\\newcommand{\\E}{E}\n\\newcommand{\\Expect}[1]{\\E\\left[ #1 \\right]}\n\\newcommand{\\Var}[1]{\\mathrm{Var}\\left[ #1 \\right]}\n\\newcommand{\\Cov}[2]{\\mathrm{Cov}\\left[#1,\\ #2\\right]}\n\\newcommand{\\given}{\\ \\vert\\ }\n\\newcommand{\\X}{\\mathbf{X}}\n\\newcommand{\\x}{\\mathbf{x}}\n\\newcommand{\\y}{\\mathbf{y}}\n\\newcommand{\\P}{\\mathcal{P}}\n\\newcommand{\\R}{\\mathbb{R}}\n\\newcommand{\\norm}[1]{\\left\\lVert #1 \\right\\rVert}\n\\newcommand{\\snorm}[1]{\\lVert #1 \\rVert}\n\\newcommand{\\tr}[1]{\\mbox{tr}(#1)}\n\\newcommand{\\brt}{\\widehat{\\beta}^R_{s}}\n\\newcommand{\\brl}{\\widehat{\\beta}^R_{\\lambda}}\n\\newcommand{\\bls}{\\widehat{\\beta}_{ols}}\n\\newcommand{\\blt}{\\widehat{\\beta}^L_{s}}\n\\newcommand{\\bll}{\\widehat{\\beta}^L_{\\lambda}}\n\\]" + "text": "18 The bootstrap\nStat 406\nDaniel J. McDonald\nLast modified – 30 October 2023\n\\[\n\\DeclareMathOperator*{\\argmin}{argmin}\n\\DeclareMathOperator*{\\argmax}{argmax}\n\\DeclareMathOperator*{\\minimize}{minimize}\n\\DeclareMathOperator*{\\maximize}{maximize}\n\\DeclareMathOperator*{\\find}{find}\n\\DeclareMathOperator{\\st}{subject\\,\\,to}\n\\newcommand{\\E}{E}\n\\newcommand{\\Expect}[1]{\\E\\left[ #1 \\right]}\n\\newcommand{\\Var}[1]{\\mathrm{Var}\\left[ #1 \\right]}\n\\newcommand{\\Cov}[2]{\\mathrm{Cov}\\left[#1,\\ #2\\right]}\n\\newcommand{\\given}{\\ \\vert\\ }\n\\newcommand{\\X}{\\mathbf{X}}\n\\newcommand{\\x}{\\mathbf{x}}\n\\newcommand{\\y}{\\mathbf{y}}\n\\newcommand{\\P}{\\mathcal{P}}\n\\newcommand{\\R}{\\mathbb{R}}\n\\newcommand{\\norm}[1]{\\left\\lVert #1 \\right\\rVert}\n\\newcommand{\\snorm}[1]{\\lVert #1 \\rVert}\n\\newcommand{\\tr}[1]{\\mbox{tr}(#1)}\n\\newcommand{\\brt}{\\widehat{\\beta}^R_{s}}\n\\newcommand{\\brl}{\\widehat{\\beta}^R_{\\lambda}}\n\\newcommand{\\bls}{\\widehat{\\beta}_{ols}}\n\\newcommand{\\blt}{\\widehat{\\beta}^L_{s}}\n\\newcommand{\\bll}{\\widehat{\\beta}^L_{\\lambda}}\n\\]" }, { "objectID": "schedule/slides/18-the-bootstrap.html#a-small-detour", @@ -230,12 +230,19 @@ "section": "Bootstrap procedure", "text": "Bootstrap procedure\n\nResample your training data w/ replacement.\nCalculate LDA on this sample.\nProduce a new prediction, call it \\(\\widehat{Pr}_b(y_0 =1 \\given x_0)\\).\nRepeat 1-3 \\(b = 1,\\ldots,B\\) times.\nCI: \\(\\left[2\\widehat{Pr}(y_0 =1 \\given x_0) - \\widehat{F}_{boot}(1-\\alpha/2),\\ 2\\widehat{Pr}(y_0 =1 \\given x_0) - \\widehat{F}_{boot}(\\alpha/2)\\right]\\)\n\n\n\\(\\hat{F}\\) is the “empirical” distribution of the bootstraps." }, + { + "objectID": "schedule/slides/18-the-bootstrap.html#empirical-distribution", + "href": "schedule/slides/18-the-bootstrap.html#empirical-distribution", + "title": "UBC Stat406 2023W", + "section": "Empirical distribution", + "text": "Empirical distribution\n\n\nCode\nr <- rexp(50, 1 / 5)\nggplot(tibble(r = r), aes(r)) + \n stat_ecdf(colour = orange) +\n geom_vline(xintercept = quantile(r, probs = c(.05, .95))) +\n geom_hline(yintercept = c(.05, .95), linetype = \"dashed\") +\n annotate(\n \"label\", x = c(5, 12), y = c(.25, .75), \n label = c(\"hat(F)[boot](.05)\", \"hat(F)[boot](.95)\"), \n parse = TRUE\n )" + }, { "objectID": "schedule/slides/18-the-bootstrap.html#very-basic-example", "href": "schedule/slides/18-the-bootstrap.html#very-basic-example", "title": "UBC Stat406 2023W", "section": "Very basic example", - "text": "Very basic example\n\nLet \\(X_i\\sim Exponential(1/5)\\). The pdf is \\(f(x) = \\frac{1}{5}e^{-x/5}\\)\nI know if I estimate the mean with \\(\\bar{X}\\), then by the CLT (if \\(n\\) is big),\n\n\\[\\frac{\\sqrt{n}(\\bar{X}-E[X])}{s} \\approx N(0, 1).\\]\n\nThis gives me a 95% confidence interval like \\[\\bar{X} \\pm 2s/\\sqrt{n}\\]\nBut I don’t want to estimate the mean, I want to estimate the median." + "text": "Very basic example\n\nLet \\(X_i\\sim \\textrm{Exponential}(1/5)\\). The pdf is \\(f(x) = \\frac{1}{5}e^{-x/5}\\)\nI know if I estimate the mean with \\(\\bar{X}\\), then by the CLT (if \\(n\\) is big),\n\n\\[\\frac{\\sqrt{n}(\\bar{X}-E[X])}{s} \\approx N(0, 1).\\]\n\nThis gives me a 95% confidence interval like \\[\\bar{X} \\pm 2s/\\sqrt{n}\\]\nBut I don’t want to estimate the mean, I want to estimate the median." }, { "objectID": "schedule/slides/18-the-bootstrap.html#section-1", @@ -319,7 +326,7 @@ "href": "schedule/slides/18-the-bootstrap.html#types-of-intervals-1", "title": "UBC Stat406 2023W", "section": "Types of intervals", - "text": "Types of intervals\nLet \\(\\hat{\\theta}\\) be our sample statistic, \\(\\hat{\\Theta}\\) be the resamples\n\\[\n[\\hat{\\theta} - t_{\\alpha/2}\\hat{s},\\ \\hat{\\theta} - t_{\\alpha/2}\\hat{s}]\n\\]\nwhere \\(\\hat{s} = \\sqrt{\\Var{\\hat{\\Theta}}}\\)\n\n\nCalled the “Normal Interval”\nOnly works if the distribution of \\(\\hat{\\Theta}\\) is approximately Normal.\nUnlikely to work well\nDon’t do this" + "text": "Types of intervals\nLet \\(\\hat{\\theta}\\) be our sample statistic, \\(\\hat{\\Theta}\\) be the resamples\n\\[\n[\\hat{\\theta} - t_{\\alpha/2}s,\\ \\hat{\\theta} - t_{\\alpha/2}s]\n\\]\nwhere \\(\\hat{s} = \\sqrt{\\Var{\\hat{\\Theta}}}\\)\n\n\nCalled the “Normal Interval”\nOnly works if the distribution of \\(\\hat{\\Theta}\\) is approximately Normal.\nUnlikely to work well\nDon’t do this" }, { "objectID": "schedule/slides/18-the-bootstrap.html#types-of-intervals-2", @@ -3546,7 +3553,7 @@ "href": "schedule/slides/17-nonlinear-classifiers.html#meta-lecture", "title": "UBC Stat406 2023W", "section": "17 Nonlinear classifiers", - "text": "17 Nonlinear classifiers\nStat 406\nDaniel J. McDonald\nLast modified – 26 October 2023\n\\[\n\\DeclareMathOperator*{\\argmin}{argmin}\n\\DeclareMathOperator*{\\argmax}{argmax}\n\\DeclareMathOperator*{\\minimize}{minimize}\n\\DeclareMathOperator*{\\maximize}{maximize}\n\\DeclareMathOperator*{\\find}{find}\n\\DeclareMathOperator{\\st}{subject\\,\\,to}\n\\newcommand{\\E}{E}\n\\newcommand{\\Expect}[1]{\\E\\left[ #1 \\right]}\n\\newcommand{\\Var}[1]{\\mathrm{Var}\\left[ #1 \\right]}\n\\newcommand{\\Cov}[2]{\\mathrm{Cov}\\left[#1,\\ #2\\right]}\n\\newcommand{\\given}{\\ \\vert\\ }\n\\newcommand{\\X}{\\mathbf{X}}\n\\newcommand{\\x}{\\mathbf{x}}\n\\newcommand{\\y}{\\mathbf{y}}\n\\newcommand{\\P}{\\mathcal{P}}\n\\newcommand{\\R}{\\mathbb{R}}\n\\newcommand{\\norm}[1]{\\left\\lVert #1 \\right\\rVert}\n\\newcommand{\\snorm}[1]{\\lVert #1 \\rVert}\n\\newcommand{\\tr}[1]{\\mbox{tr}(#1)}\n\\newcommand{\\brt}{\\widehat{\\beta}^R_{s}}\n\\newcommand{\\brl}{\\widehat{\\beta}^R_{\\lambda}}\n\\newcommand{\\bls}{\\widehat{\\beta}_{ols}}\n\\newcommand{\\blt}{\\widehat{\\beta}^L_{s}}\n\\newcommand{\\bll}{\\widehat{\\beta}^L_{\\lambda}}\n\\]" + "text": "17 Nonlinear classifiers\nStat 406\nDaniel J. McDonald\nLast modified – 30 October 2023\n\\[\n\\DeclareMathOperator*{\\argmin}{argmin}\n\\DeclareMathOperator*{\\argmax}{argmax}\n\\DeclareMathOperator*{\\minimize}{minimize}\n\\DeclareMathOperator*{\\maximize}{maximize}\n\\DeclareMathOperator*{\\find}{find}\n\\DeclareMathOperator{\\st}{subject\\,\\,to}\n\\newcommand{\\E}{E}\n\\newcommand{\\Expect}[1]{\\E\\left[ #1 \\right]}\n\\newcommand{\\Var}[1]{\\mathrm{Var}\\left[ #1 \\right]}\n\\newcommand{\\Cov}[2]{\\mathrm{Cov}\\left[#1,\\ #2\\right]}\n\\newcommand{\\given}{\\ \\vert\\ }\n\\newcommand{\\X}{\\mathbf{X}}\n\\newcommand{\\x}{\\mathbf{x}}\n\\newcommand{\\y}{\\mathbf{y}}\n\\newcommand{\\P}{\\mathcal{P}}\n\\newcommand{\\R}{\\mathbb{R}}\n\\newcommand{\\norm}[1]{\\left\\lVert #1 \\right\\rVert}\n\\newcommand{\\snorm}[1]{\\lVert #1 \\rVert}\n\\newcommand{\\tr}[1]{\\mbox{tr}(#1)}\n\\newcommand{\\brt}{\\widehat{\\beta}^R_{s}}\n\\newcommand{\\brl}{\\widehat{\\beta}^R_{\\lambda}}\n\\newcommand{\\bls}{\\widehat{\\beta}_{ols}}\n\\newcommand{\\blt}{\\widehat{\\beta}^L_{s}}\n\\newcommand{\\bll}{\\widehat{\\beta}^L_{\\lambda}}\n\\]" }, { "objectID": "schedule/slides/17-nonlinear-classifiers.html#last-time", @@ -3639,6 +3646,13 @@ "section": "knn.cv() (leave one out)", "text": "knn.cv() (leave one out)\n\nkmax <- 20\nerr <- map_dbl(1:kmax, ~ mean(knn.cv(dat1[, -1], dat1$y, k = .x) != dat1$y))\n\n\nI would use the largest (odd) k that is close to the minimum.\nThis produces simpler, smoother, decision boundaries." }, + { + "objectID": "schedule/slides/17-nonlinear-classifiers.html#alternative-using-deviance-loss-i-think-this-is-right", + "href": "schedule/slides/17-nonlinear-classifiers.html#alternative-using-deviance-loss-i-think-this-is-right", + "title": "UBC Stat406 2023W", + "section": "Alternative (using deviance loss, I think this is right)", + "text": "Alternative (using deviance loss, I think this is right)\n\n\nCode\ndev <- function(y, prob, prob_min = 1e-5) {\n y <- as.numeric(as.factor(y)) - 1 # 0/1 valued\n m <- mean(y)\n prob_max <- 1 - prob_min\n prob <- pmin(pmax(prob, prob_min), prob_max)\n lp <- (1 - y) * log(1 - prob) + y * log(prob)\n ly <- (1 - y) * log(1 - m) + y * log(m)\n 2 * (ly - lp)\n}\nknn.cv_probs <- function(train, cl, k = 1) {\n o <- knn.cv(train, cl, k = k, prob = TRUE)\n p <- attr(o, \"prob\")\n o <- as.numeric(as.factor(o)) - 1\n p[o == 0] <- 1 - p[o == 0]\n p\n}\ndev_err <- map_dbl(1:kmax, ~ mean(dev(dat1$y, knn.cv_probs(dat1[, -1], dat1$y, k = .x))))" + }, { "objectID": "schedule/slides/17-nonlinear-classifiers.html#final-version", "href": "schedule/slides/17-nonlinear-classifiers.html#final-version", diff --git a/sitemap.xml b/sitemap.xml index 45346a0..bbe3039 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,166 +2,166 @@ https://github.com/UBC-STAT/stat-406/schedule/handouts/lab00-git.html - 2023-10-26T13:27:09.096Z + 2023-10-30T17:41:01.448Z https://github.com/UBC-STAT/stat-406/schedule/slides/22-nnets-estimation.html - 2023-10-26T13:27:06.196Z + 2023-10-30T17:40:59.100Z https://github.com/UBC-STAT/stat-406/schedule/slides/20-boosting.html - 2023-10-26T13:27:04.808Z + 2023-10-30T17:40:57.956Z https://github.com/UBC-STAT/stat-406/schedule/slides/18-the-bootstrap.html - 2023-10-26T13:27:02.872Z + 2023-10-30T17:40:56.436Z https://github.com/UBC-STAT/stat-406/schedule/slides/16-logistic-regression.html - 2023-10-26T13:27:00.820Z + 2023-10-30T17:40:54.748Z https://github.com/UBC-STAT/stat-406/schedule/slides/14-classification-intro.html - 2023-10-26T13:26:59.144Z + 2023-10-30T17:40:53.376Z https://github.com/UBC-STAT/stat-406/schedule/slides/12-why-smooth.html - 2023-10-26T13:26:57.276Z + 2023-10-30T17:40:51.824Z https://github.com/UBC-STAT/stat-406/schedule/slides/10-basis-expansions.html - 2023-10-26T13:26:55.764Z + 2023-10-30T17:40:50.640Z https://github.com/UBC-STAT/stat-406/schedule/slides/08-ridge-regression.html - 2023-10-26T13:26:53.932Z + 2023-10-30T17:40:49.084Z https://github.com/UBC-STAT/stat-406/schedule/slides/06-information-criteria.html - 2023-10-26T13:26:51.908Z + 2023-10-30T17:40:47.432Z https://github.com/UBC-STAT/stat-406/schedule/slides/04-bias-variance.html - 2023-10-26T13:26:50.024Z + 2023-10-30T17:40:45.832Z https://github.com/UBC-STAT/stat-406/schedule/slides/02-lm-example.html - 2023-10-26T13:26:48.152Z + 2023-10-30T17:40:44.224Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-version-control.html - 2023-10-26T13:26:46.524Z + 2023-10-30T17:40:42.920Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-quiz-0-wrap.html - 2023-10-26T13:26:43.216Z + 2023-10-30T17:40:40.188Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-gradient-descent.html - 2023-10-26T13:26:41.532Z + 2023-10-30T17:40:38.876Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-classification-losses.html - 2023-10-26T13:26:39.768Z + 2023-10-30T17:40:36.448Z https://github.com/UBC-STAT/stat-406/course-setup.html - 2023-10-26T13:26:37.540Z + 2023-10-30T17:40:33.627Z https://github.com/UBC-STAT/stat-406/computing/windows.html - 2023-10-26T13:26:35.148Z + 2023-10-30T17:40:31.655Z https://github.com/UBC-STAT/stat-406/computing/mac_x86.html - 2023-10-26T13:26:33.136Z + 2023-10-30T17:40:29.975Z https://github.com/UBC-STAT/stat-406/computing/index.html - 2023-10-26T13:26:31.268Z + 2023-10-30T17:40:28.383Z https://github.com/UBC-STAT/stat-406/index.html - 2023-10-26T13:26:29.064Z + 2023-10-30T17:40:26.495Z https://github.com/UBC-STAT/stat-406/faq.html - 2023-10-26T13:26:30.768Z + 2023-10-30T17:40:27.951Z https://github.com/UBC-STAT/stat-406/computing/mac_arm.html - 2023-10-26T13:26:32.252Z + 2023-10-30T17:40:29.183Z https://github.com/UBC-STAT/stat-406/computing/ubuntu.html - 2023-10-26T13:26:34.068Z + 2023-10-30T17:40:30.731Z https://github.com/UBC-STAT/stat-406/syllabus.html - 2023-10-26T13:26:36.716Z + 2023-10-30T17:40:32.963Z https://github.com/UBC-STAT/stat-406/schedule/index.html - 2023-10-26T13:26:38.776Z + 2023-10-30T17:40:34.655Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-cv-for-many-models.html - 2023-10-26T13:26:40.520Z + 2023-10-30T17:40:38.072Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-intro-to-class.html - 2023-10-26T13:26:42.448Z + 2023-10-30T17:40:39.568Z https://github.com/UBC-STAT/stat-406/schedule/slides/00-r-review.html - 2023-10-26T13:26:45.136Z + 2023-10-30T17:40:41.760Z https://github.com/UBC-STAT/stat-406/schedule/slides/01-lm-review.html - 2023-10-26T13:26:47.340Z + 2023-10-30T17:40:43.572Z https://github.com/UBC-STAT/stat-406/schedule/slides/03-regression-function.html - 2023-10-26T13:26:49.192Z + 2023-10-30T17:40:45.120Z https://github.com/UBC-STAT/stat-406/schedule/slides/05-estimating-test-mse.html - 2023-10-26T13:26:51.040Z + 2023-10-30T17:40:46.704Z https://github.com/UBC-STAT/stat-406/schedule/slides/07-greedy-selection.html - 2023-10-26T13:26:52.864Z + 2023-10-30T17:40:48.224Z https://github.com/UBC-STAT/stat-406/schedule/slides/09-l1-penalties.html - 2023-10-26T13:26:54.924Z + 2023-10-30T17:40:49.968Z https://github.com/UBC-STAT/stat-406/schedule/slides/11-kernel-smoothers.html - 2023-10-26T13:26:56.700Z + 2023-10-30T17:40:51.356Z https://github.com/UBC-STAT/stat-406/schedule/slides/13-gams-trees.html - 2023-10-26T13:26:58.084Z + 2023-10-30T17:40:52.560Z https://github.com/UBC-STAT/stat-406/schedule/slides/15-LDA-and-QDA.html - 2023-10-26T13:27:00.000Z + 2023-10-30T17:40:54.068Z https://github.com/UBC-STAT/stat-406/schedule/slides/17-nonlinear-classifiers.html - 2023-10-26T13:27:01.840Z + 2023-10-30T17:40:55.592Z https://github.com/UBC-STAT/stat-406/schedule/slides/19-bagging-and-rf.html - 2023-10-26T13:27:03.840Z + 2023-10-30T17:40:57.212Z https://github.com/UBC-STAT/stat-406/schedule/slides/21-nnets-intro.html - 2023-10-26T13:27:05.416Z + 2023-10-30T17:40:58.488Z https://github.com/UBC-STAT/stat-406/schedule/slides/23-nnets-other.html - 2023-10-26T13:27:07.276Z + 2023-10-30T17:41:00.000Z