Adding additional test cases that were collaboratively developed

jan-imbi · Jul 10, 2024 · cbb87c0 · cbb87c0
1 parent 6d0decd
commit cbb87c0
Show file tree

Hide file tree

Showing 9 changed files with 259 additions and 36 deletions.
diff --git a/R/analyze.R b/R/analyze.R
@@ -64,7 +64,7 @@ setGeneric("analyze", function(data,
 setMethod("analyze", signature("data.frame"),
           function(data, statistics, data_distribution, use_full_twoarm_sampling_distribution, design, sigma, exact){
             if (missing(data))
-              stop("data argument may not be ommited.")
+              stop("data argument may not be ommited.") #nocov
             if (is(data_distribution, "Student") && !missing(sigma)){
               warning("data_distribution was set to Normal because sigma was specified.")
               data_distribution <- Normal(two_armed = data_distribution@two_armed)

diff --git a/R/reference_implementation.R b/R/reference_implementation.R
@@ -348,7 +348,7 @@
       # estimator is that the (normal) Rao-Blackwell estimator uses the actual
       # n2 preimage.
       n2 <- ceiling(.n2_extrapol(design, z1))
-      lower_z1 <- uniroot(\(x) .n2_extrapol(design, x) - n2 - -2, c(c1f, c1e))$root
+      lower_z1 <- uniroot(\(x) .n2_extrapol(design, x) - n2, c(c1f, c1e))$root
       upper_z1 <- uniroot(\(x) .n2_extrapol(design, x) - n2 - -1, c(c1f, c1e))$root
       x <- .x1_x2_to_x(x1 = x1, x2 = x2, n1 = n1, n2 = n2)
       numerator <-
@@ -358,8 +358,8 @@
             x1_ * .f2(z1 = z1_[1,,drop=FALSE], z2 = (((n1 + n2) * x - n1 * x1_) / n2 - mu0) * sqrt(n2) / sigma,
                       n1 = n1,  n2 = n2, mu = mu0, mu0 = mu0, sigma = sigma)
           },
-          lowerLimit = c1f,
-          upperLimit = c1e,
+          lowerLimit = lower_z1,
+          upperLimit = upper_z1,
           vectorInterface = TRUE
         )$integral
       denominator <-
@@ -804,9 +804,9 @@
   minc2 <- .c2_extrapol(design, c1e)
   maxc2 <- .c2_extrapol(design, c1f)
   # Assumes c2 is monotonically decreasing
-  if (maxc2 < lower_z2) {
+  if (maxc2 < upper_z2) {
     ret <- upper_l
-  } else if (minc2 >upper_z2) {
+  } else if (minc2 >lower_z2) {
     ret <- lower_l
   } else{
     ret <- uniroot(\(x) .c2_extrapol(design, (x1 - x)*sqrt(n1)/sigma) - (x2 - x)*sqrt(n2)/sigma,

diff --git a/README.Rmd b/README.Rmd
@@ -38,6 +38,10 @@ An introductory vignette covering common usecases is given at [https://jan-imbi.
 This package comes suite of unit tests. The code of the test cases can be viewed here: [https://github.com/jan-imbi/adestr/tree/master/tests/testthat](https://github.com/jan-imbi/adestr/tree/master/tests/testthat). The authors assume no responsibility for the correctness of the
 code or results produced by its usage. Use at your own risk.
 
+You may also be interested in the reference implementation looking at the [https://github.com/jan-imbi/adestr/blob/master/R/reference_implementation.R](https://github.com/jan-imbi/adestr/blob/master/R/reference_implementation.R).
+It uses the same notation as in our paper ([doi.org/10.1002/sim.10020](https://doi.org/10.1002/sim.10020)) and may therefore be
+easier to understand at first.
+
 <!-- reference implementation verlinken -->
 
 ## Installation

diff --git a/README.md b/README.md
@@ -31,6 +31,13 @@ be viewed here:
 authors assume no responsibility for the correctness of the code or
 results produced by its usage. Use at your own risk.
 
+You may also be interested in the reference implementation looking at
+the
+<https://github.com/jan-imbi/adestr/blob/master/R/reference_implementation.R>.
+It uses the same notation as in our paper
+([doi.org/10.1002/sim.10020](https://doi.org/10.1002/sim.10020)) and may
+therefore be easier to understand at first.
+
 <!-- reference implementation verlinken -->
 
 ## Installation

diff --git a/tests/testthat/test_against_reference.R b/tests/testthat/test_against_reference.R
@@ -4,31 +4,46 @@ test_that("densities agree with reference implementation.",
                f1_kv(z1 = 1.5, n1 = 30, mu = 0, sigma = 1, two_armed = FALSE))
   expect_equal(.f2(z1 = 1.5, z2 = 1, n1 = 30, n2 = 45, mu = 0, mu0 = 0, sigma = 1),
                f2_kv(z1 = 1.5, z2 = 1, n1 = 30, n2 = 45, mu = 0, sigma = 1, two_armed = FALSE))
+  expect_equal(.f(z1 = 1.5, z2 = 1, mu = 0, mu0 = 0, sigma = 1, designad),
+               f2_kv(z1 = 1.5, z2 = 1, n1 = 30, n2 = 45, mu = 0, sigma = 1, two_armed = FALSE))
+})
+test_that("MLE is correctly implemented.",
+{
+  expect_equal(.mle(1,2, 10, 20), (1*10 + 2*20)/30)
+  expect_equal(.fixed_weighted_mle(1,2, 1), 1)
 
 })
 test_that("MLE densities agree with reference implementation.",
+          {
+            expect_equal(.mle_pdf(0.5, 0, 0, 1, designad),
+                         dsmean(
+                           Normal(two_armed = FALSE),
+                           designad,
+                           .5,
+                           0,
+                           1,
+                           exact = FALSE,
+                           combine_components = TRUE
+                         ), tolerance = 1e-4)
+            expect_equal(.mle_pdf(1.5, 1, 1, 1, designad),
+                         dsmean(
+                           Normal(two_armed = FALSE),
+                           designad,
+                           .5,
+                           0,
+                           1,
+                           exact = FALSE,
+                           combine_components = TRUE
+                         ), tolerance = 1e-4)
+          })
+test_that("mle CDF works.",
 {
-  expect_equal(.mle_pdf(0.5, 0, 0, 1, designad),
-               dsmean(
-                 Normal(two_armed = FALSE),
-                 designad,
-                 .5,
-                 0,
-                 1,
-                 exact = FALSE,
-                 combine_components = TRUE
-               ), tolerance = 1e-4)
-    expect_equal(.mle_pdf(1.5, 1, 1, 1, designad),
-               dsmean(
-                 Normal(two_armed = FALSE),
-                 designad,
-                 .5,
-                 0,
-                 1,
-                 exact = FALSE,
-                 combine_components = TRUE
-               ), tolerance = 1e-4)
-})
+  expect_equal(.mle_cdf(5, 0, 0, 1, designad), 1)
+  expect_equal(.mle_cdf(-5, 0, 0, 1, designad), 0)
+}
+)
+
+
 test_that("pseudo Rao-Blackwell estimator agrees with reference implementation.",
           {
             prb <- get_stagewise_estimators(PseudoRaoBlackwell(), Normal(FALSE), FALSE, designad, 1, FALSE)
@@ -37,6 +52,15 @@ test_that("pseudo Rao-Blackwell estimator agrees with reference implementation."
                                 0, 1, FALSE),
                          tolerance = 1e-3)
           })
+
+test_that("Rao-Blackwell estimator agrees with reference implementation.",
+          {
+            rb <- get_stagewise_estimators(RaoBlackwell(), Normal(FALSE), FALSE, designad, 1, FALSE)
+            expect_equal(.rao_blackwell(.4, .4, .0, 1, designad),
+                         rb$g2(.4, .4, designad@n1, n2_extrapol(designad, smean_to_z(.4, designad@n1, 1, FALSE)),
+                                0, 1, FALSE),
+                         tolerance = 1e-3)
+          })
 test_that("AWSM (min peak var) estimator agrees with reference implementation.",
           {
             awsm <- get_stagewise_estimators(MinimizePeakVariance(), Normal(FALSE), FALSE, designad, 1, FALSE)
@@ -83,7 +107,7 @@ test_that("median unbiased (SWCF ordering) estimator agrees with reference imple
                          med$g2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
                          tolerance = 1e-3)
           })
-test_that("median unbiased (SWCF ordering) estimator agrees with reference implementation.",
+test_that("median unbiased (Neyman Pearson ordering) estimator agrees with reference implementation.",
           {
             p <- get_stagewise_estimators(NeymanPearsonOrderingPValue(0, 0.4), Normal(FALSE), FALSE, designad, 1, FALSE)
             expect_equal(.p_np(x1 = .2, x2 = .2, mu = 0, mu0 = 0, mu1 = .4, sigma = 1, design = designad),
@@ -99,5 +123,74 @@ test_that("P-value (Neyman-Pearson ordering) agrees with reference implementatio
           })
 
 
+test_that("CI (MLE ordering) agrees with reference implementation.",
+          {
+            ci <- get_stagewise_estimators(MLEOrderingCI(), Normal(FALSE), FALSE, designad, 1, FALSE)
+            ref_ci <- .confidence_interval_ml(x1 = .2, x2 = .2, mu0 = 0, sigma = 1, alpha = 0.05, design=designad)
+            expect_equal(ref_ci[1],
+                         ci$l2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-2)
+            expect_equal(ref_ci[2],
+                         ci$u2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-2)
+          })
 
+test_that("CI (LR ordering) agrees with reference implementation.",
+          {
+            ci <- get_stagewise_estimators(LikelihoodRatioOrderingCI(), Normal(FALSE), FALSE, designad, 1, FALSE)
+            ref_ci <- .confidence_interval_lr(x1 = .2, x2 = .2, mu0 = 0, sigma = 1, alpha = 0.05, design=designad)
+            expect_equal(ref_ci[1],
+                         ci$l2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-2)
+            expect_equal(ref_ci[2],
+                         ci$u2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-2)
+          })
+test_that("CI (ST ordering) agrees with reference implementation.",
+          {
+            ci <- get_stagewise_estimators(ScoreTestOrderingCI(), Normal(FALSE), FALSE, designad, 1, FALSE)
+            ref_ci <- .confidence_interval_st(x1 = .2, x2 = .2, mu0 = 0, sigma = 1, alpha = 0.05, design=designad)
+            expect_equal(ref_ci[1],
+                         ci$l2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-1)
+            expect_equal(ref_ci[2],
+                         ci$u2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-1)
+          })
+test_that("CI (SWCF ordering) agrees with reference implementation.",
+          {
+            ci <- get_stagewise_estimators(StagewiseCombinationFunctionOrderingCI(), Normal(FALSE), FALSE, designad, 1, FALSE)
+            ref_ci <- .confidence_interval_swcf(x1 = .2, x2 = .2, mu0 = 0, sigma = 1, alpha = 0.05, design=designad)
+            expect_equal(ref_ci[1],
+                         ci$l2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-2)
+            expect_equal(ref_ci[2],
+                         ci$u2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-2)
+          })
+
+
+test_that("CI (naive) agrees with reference implementation.",
+          {
+            ci <- get_stagewise_estimators(NaiveCI(), Normal(FALSE), FALSE, designad, 1, FALSE)
+            ref_ci <- .naive_confidence_interval(x1 = .2, x2 = .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), sigma = 1, alpha = 0.05)
+            expect_equal(ref_ci[1],
+                         ci$l2(.2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-2)
+            expect_equal(ref_ci[2],
+                         ci$u2(.2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-2)
+          })
+
+test_that("CI (repeated) agrees with reference implementation.",
+          {
+            ci <- get_stagewise_estimators(RepeatedCI(), Normal(FALSE), FALSE, designad, 1, FALSE)
+            ref_ci <- .repeated_confidence_interval(x1 = .2, x2 = .2, mu0 = 0, sigma = 1, design=designad)
+            expect_equal(ref_ci[1],
+                         ci$l2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-1)
+            expect_equal(ref_ci[2],
+                         ci$u2(designad, .2, .2, designad@n1, n2_extrapol(designad, smean_to_z(.2, designad@n1, 1, FALSE)), 1, FALSE),
+                         tolerance = 1e-1)
+          })
 
diff --git a/tests/testthat/test_analyze.R b/tests/testthat/test_analyze.R
@@ -16,6 +16,44 @@ test_that("Analysis function doesn't throw an error.",
             NA)
           })
 
+test_that("Analysis function throws an error when it should.",
+          {
+            expect_warning(
+              analyze(
+                data = dat,
+                statistics = list(FirstStageSampleMean(), SampleMean(), NaiveCI()),
+                data_distribution = Student(TRUE),
+                sigma = 1,
+                design = get_example_design(TRUE)
+              ))
+            expect_warning(expect_warning(expect_warning(
+              analyze(
+                data = dat,
+                statistics = list(FirstStageSampleMean(), SampleMean(), NaiveCI()),
+                data_distribution = Normal(TRUE),
+                design = get_example_design(TRUE)
+              ))))
+            expect_error(analyze(
+              data = dat,
+              statistics = list(FirstStageSampleMean(), SampleMean(), NaiveCI()),
+              data_distribution = Student(FALSE),
+              design = get_example_design(TRUE)
+            ))
+
+          })
 
+test_that("Single-stage analyze works",
+{
+  expect_warning(
+    expect_error(analyze(
+      data = dat[dat$stage==1,],
+      statistics = list(FirstStageSampleMean(), SampleMean(), NaiveCI()),
+      data_distribution = Normal(TRUE),
+      sigma = 1,
+      design = get_example_design(TRUE)
+    ),
+    NA)
+    )
+})
 
 
diff --git a/tests/testthat/test_print.R b/tests/testthat/test_print.R
@@ -0,0 +1,35 @@
+test_that("Console output for various classes doesn't produce errors.",
+{
+  set.seed(321)
+  dat <- data.frame(
+    endpoint = rnorm(sum(c(56, 56, 47, 47)), mean = rep(c(.3, 0, .3, 0), c(56, 56, 47, 47))),
+    group = factor(rep(c("ctl", "trt", "ctl", "trt"), c(56,56,47,47))),
+    stage = rep(c(1L, 2L), c(56*2, 47*2))
+  )
+  expect_error(
+    capture.output(show(designad)),
+    NA)
+  expect_error(
+    capture.output(show(evaluate_estimator(
+      score = MSE(),
+      estimator = SampleMean(),
+      data_distribution = Normal(),
+      design = designad,
+      mu = c(0.3),
+      sigma = 1,
+      exact = FALSE
+    ))),
+    NA)
+  expect_error(
+    capture.output(show(analyze(
+      data = dat,
+      statistics = list(FirstStageSampleMean(), SampleMean(), NaiveCI()),
+      data_distribution = Normal(TRUE),
+      sigma = 1,
+      design = get_example_design(TRUE)
+    ))),
+    NA)
+
+}
+)
+
diff --git a/tests/testthat/test_prior_integral.R b/tests/testthat/test_prior_integral.R
@@ -1,10 +1,52 @@
-evaluate_estimator(
-  score = MSE(),
-  estimator = SampleMean(),
-  data_distribution = Normal(),
-  use_full_twoarm_sampling_distribution = FALSE,
-  design = get_example_design(),
-  mu = NormalPrior(mu=0, sigma=1),
-  sigma = 1
+test_that("Calculating MSE wrt. a non-degenerate-prior is roughly the same as wrt. to a point-prior.",
+{
+  expect_equal(
+    evaluate_estimator(
+      score = MSE(),
+      estimator = SampleMean(),
+      data_distribution = Normal(),
+      use_full_twoarm_sampling_distribution = FALSE,
+      design = get_example_design(),
+      mu = NormalPrior(mu=0, sigma=1),
+      sigma = 1
+    )@results$MSE,
+    evaluate_estimator(
+      score = MSE(),
+      estimator = SampleMean(),
+      data_distribution = Normal(),
+      use_full_twoarm_sampling_distribution = FALSE,
+      design = get_example_design(),
+      mu = 0,
+      sigma = 1
+    )@results$MSE,
+    tolerance=1e-1
+    )
+
+  expect_equal(
+    evaluate_estimator(
+      score = MSE(),
+      estimator = SampleMean(),
+      data_distribution = Normal(),
+      use_full_twoarm_sampling_distribution = FALSE,
+      design = get_example_design(),
+      mu = UniformPrior(min = -.1, max = .1),
+      sigma = 1
+    )@results$MSE,
+    evaluate_estimator(
+      score = MSE(),
+      estimator = SampleMean(),
+      data_distribution = Normal(),
+      use_full_twoarm_sampling_distribution = FALSE,
+      design = get_example_design(),
+      mu = 0,
+      sigma = 1
+    )@results$MSE,
+    tolerance=1e-2
   )
+}
+)
+
+
+
+
 
diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd
@@ -34,6 +34,10 @@ An introductory vignette covering common usecases is given at [https://jan-imbi.
 This package comes suite of unit tests. The code of the test cases can be viewed here: [https://github.com/jan-imbi/adestr/tree/master/tests/testthat](https://github.com/jan-imbi/adestr/tree/master/tests/testthat). The authors assume no responsibility for the correctness of the
 code or results produced by its usage. Use at your own risk.
 
+You may also be interested in the reference implementation looking at the [https://github.com/jan-imbi/adestr/blob/master/R/reference_implementation.R](https://github.com/jan-imbi/adestr/blob/master/R/reference_implementation.R).
+It uses the same notation as in our paper ([doi.org/10.1002/sim.10020](https://doi.org/10.1002/sim.10020)) and may therefore be
+easier to understand at first.
+
 # Fitting a design with adoptr
 In order to showcase the capabilities of this package, we need a trial design first.
 We refer to [the example from the adoptr documentation](https://optad.github.io/adoptr/articles/adoptr.html)