Skip to content

Commit

Permalink
For #175: Add tests and fixes for twophase(method="simple").
Browse files Browse the repository at this point in the history
  • Loading branch information
bschneidr committed Sep 26, 2024
1 parent 5b36c38 commit 9a7ba75
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 28 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ S3method(select_,tbl_svy)
S3method(semi_join,tbl_svy)
S3method(subset_svy_vars,survey.design2)
S3method(subset_svy_vars,svyrep.design)
S3method(subset_svy_vars,twophase)
S3method(subset_svy_vars,twophase2)
S3method(summarise,grouped_svy)
S3method(summarise,tbl_svy)
Expand Down
29 changes: 29 additions & 0 deletions R/subset_svy_vars.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,35 @@ subset_svy_vars.twophase2 <- function(x, ..., .preserve = FALSE) {
x
}

# Adapted from survey:::"[.twophase"
#' @export
subset_svy_vars.twophase <- function(x, ..., .preserve = FALSE) {
dots <- rlang::quos(...)
filtered <- filtered_row_numbers(x, !!!dots, .preserve = .preserve)
filtered_vars <- filtered[["filtered_vars"]]
row_numbers <- filtered[["row_numbers"]]

## Set weights to zero: don't try to save memory
## Will always have numeric because of srvyr's structure
if (length(row_numbers) == 0) {
x$prob <- rep(Inf, length(x$prob))
x$phase2$prob <- rep(Inf, length(x$phase2$prob))

} else {
x$prob[-row_numbers] <- Inf
x$phase2$prob[-row_numbers] <- Inf
}

index <- is.finite(x$prob)
psu <- !duplicated(x$phase2$cluster[index, 1])
tt <- table(x$phase2$strata[index, 1][psu])
if(any(tt == 1)){
warning(sum(tt == 1), " strata have only one PSU in this subset.")
}

x
}


filtered_row_numbers <- function(.svy, ..., .preserve = FALSE) {
dots <- rlang::quos(...)
Expand Down
2 changes: 1 addition & 1 deletion R/survey_statistics_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ set_survey_vars <- function(
}
}

if (inherits(.svy, "twophase2")) {
if (inherits(.svy, c("twophase2", "twophase"))) {
if (!add) {
out$phase1$sample$variables <- select(out$phase1$sample$variables, dplyr::one_of(group_vars(out)))
}
Expand Down
2 changes: 1 addition & 1 deletion R/tbl-svy.r
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ as_tbl_svy <- function(x, var_names = list()) {
class(x) <- dplyr::setdiff(x_classes, db_svy_classes)
}

if (inherits(x, "twophase2")) {
if (inherits(x, c("twophase2", "twophase"))) {
# Convert to tbls if not already (expect them to be one of data.frame or tbl_df)
# data.frames will be converted, others should inherit "tbl".
if (!inherits(x$phase1$full$variables, "tbl")) {
Expand Down
120 changes: 94 additions & 26 deletions tests/testthat/test_as_survey_twophase.r
Original file line number Diff line number Diff line change
Expand Up @@ -12,50 +12,93 @@ pbc <- pbc %>%
mutate(randomized = !is.na(trt) & trt > 0,
id = row_number())

d2pbc_survey <- twophase(id=list(~id,~id), data=pbc, subset=~randomized)
d2fpbc_survey <- twophase(id=list(~id,~id), data=pbc, subset=~randomized,
method = "full")
d2spbc_survey <- twophase(id=list(~id,~id), data=pbc, subset=~randomized,
method = "simple")

d2pbc_srvyr <- pbc %>%
as_survey_twophase(id = list(id, id), subset = randomized)
d2fpbc_srvyr <- pbc %>%
as_survey_twophase(id = list(id, id), subset = randomized,
method = "full")

survey_results <- list(
as.data.frame(svymean(~bili, d2pbc_survey)),
as.data.frame(svytotal(~bili, d2pbc_survey)),
svyquantile(~bili, d2pbc_survey, quantiles = 0.5, ci = TRUE, df = NULL) %>%
d2spbc_srvyr <- pbc %>%
as_survey_twophase(id = list(id, id), subset = randomized,
method = "simple")

survey_full_results <- list(
as.data.frame(svymean(~bili, d2fpbc_survey)),
as.data.frame(svytotal(~bili, d2fpbc_survey)),
svyquantile(~bili, d2fpbc_survey, quantiles = 0.5, ci = TRUE, df = NULL) %>%
{cbind(as.data.frame(.[[1]][,'quantile']),
as.data.frame(SE(.)))},
as.data.frame(svyvar(~bili, d2pbc_survey)),
data.frame(albumin = svyratio(~bili, ~albumin, d2pbc_survey)[[1]],
se = sqrt(unname(svyratio(~bili, ~albumin, d2pbc_survey)[[2]])))
as.data.frame(svyvar(~bili, d2fpbc_survey)),
data.frame(albumin = svyratio(~bili, ~albumin, d2fpbc_survey)[[1]],
se = sqrt(unname(svyratio(~bili, ~albumin, d2fpbc_survey)[[2]])))
) %>% dplyr::bind_cols() %>%
setNames(c("mean", "mean_se", "total", "total_se", "median", "median_se",
"var", "var_se", "ratio", "ratio_se")) %>%
tibble::as_tibble()

srvyr_results <- d2pbc_srvyr %>%
survey_simple_results <- list(
as.data.frame(svymean(~bili, d2spbc_survey)),
as.data.frame(svytotal(~bili, d2spbc_survey)),
svyquantile(~bili, d2spbc_survey, quantiles = 0.5, ci = TRUE, df = NULL) %>%
{cbind(as.data.frame(.[[1]][,'quantile']),
as.data.frame(SE(.)))},
as.data.frame(svyvar(~bili, d2spbc_survey)),
data.frame(albumin = svyratio(~bili, ~albumin, d2spbc_survey)[[1]],
se = sqrt(unname(svyratio(~bili, ~albumin, d2spbc_survey)[[2]])))
) %>% dplyr::bind_cols() %>%
setNames(c("mean", "mean_se", "total", "total_se", "median", "median_se",
"var", "var_se", "ratio", "ratio_se")) %>%
tibble::as_tibble()

srvyr_full_results <- d2fpbc_srvyr %>%
summarize(mean = survey_mean(bili),
total = survey_total(bili),
median = survey_median(bili, df = NULL),
var = survey_var(bili),
ratio = survey_ratio(bili, albumin))

test_that("as_survey_twophase gets same mean / total / median / var / ratio in srvyr",
expect_df_equal(survey_results, srvyr_results))
srvyr_simple_results <- d2spbc_srvyr %>%
summarize(mean = survey_mean(bili),
total = survey_total(bili),
median = survey_median(bili, df = NULL),
var = survey_var(bili),
ratio = survey_ratio(bili, albumin))

test_that("as_survey_twophase(method = 'full') gets same mean / total / median / var / ratio in srvyr",
expect_df_equal(survey_full_results, srvyr_full_results))

test_that("as_survey_twophase(method = 'simple') gets same mean / total / median / var / ratio in srvyr",
expect_df_equal(survey_simple_results, srvyr_simple_results))

srvyr_full_results_vartypeNULL <- d2fpbc_srvyr %>%
summarize(mean = survey_mean(bili, vartype = NULL),
total = survey_total(bili, vartype = NULL),
median = survey_median(bili, vartype = NULL),
var = survey_var(bili, vartype = NULL),
ratio = survey_ratio(bili, albumin, vartype = NULL))

srvyr_results_vartypeNULL <- d2pbc_srvyr %>%
srvyr_simple_results_vartypeNULL <- d2spbc_srvyr %>%
summarize(mean = survey_mean(bili, vartype = NULL),
total = survey_total(bili, vartype = NULL),
median = survey_median(bili, vartype = NULL),
var = survey_var(bili, vartype = NULL),
ratio = survey_ratio(bili, albumin, vartype = NULL))

test_that("as_survey_twophase gets same mean / total / median / var / ratio in srvyr - with vartype = NULL",
expect_df_equal(select(survey_results, -ends_with("_se")),
srvyr_results_vartypeNULL))
test_that("as_survey_twophase(method = 'full') gets same mean / total / median / var / ratio in srvyr - with vartype = NULL",
expect_df_equal(select(survey_full_results, -ends_with("_se")),
srvyr_full_results_vartypeNULL))

test_that("as_survey_twophase(method = 'simple') gets same mean / total / median / var / ratio in srvyr - with vartype = NULL",
expect_df_equal(select(survey_full_results, -ends_with("_se")),
srvyr_full_results_vartypeNULL))

survey_results <- list(
as.data.frame(svyby(~bili, ~sex, d2pbc_survey, svymean)),
as.data.frame(svyby(~bili, ~sex, d2pbc_survey, svytotal)),
as.data.frame(suppressWarnings(svyby(~bili, ~sex, d2pbc_survey, svyquantile,
survey_full_results <- list(
as.data.frame(svyby(~bili, ~sex, d2fpbc_survey, svymean)),
as.data.frame(svyby(~bili, ~sex, d2fpbc_survey, svytotal)),
as.data.frame(suppressWarnings(svyby(~bili, ~sex, d2fpbc_survey, svyquantile,
quantiles = 0.5, ci = TRUE, df = NULL)))
) %>%
as.data.frame() %>%
Expand All @@ -65,15 +108,40 @@ survey_results <- list(
select(-sex2, -sex3) %>%
tibble::as_tibble()

attr(survey_results, "svyby") <- NULL
attr(survey_results, "call") <- NULL
survey_simple_results <- list(
as.data.frame(svyby(~bili, ~sex, d2spbc_survey, svymean)),
as.data.frame(svyby(~bili, ~sex, d2spbc_survey, svytotal)),
as.data.frame(suppressWarnings(svyby(~bili, ~sex, d2spbc_survey, svyquantile,
quantiles = 0.5, ci = TRUE, df = NULL)))
) %>%
as.data.frame() %>%
dplyr::bind_cols() %>%
setNames(c("sex", "mean", "mean_se", "sex2", "total", "total_se",
"sex3", "median", "median_se")) %>%
select(-sex2, -sex3) %>%
tibble::as_tibble()

suppressWarnings(srvyr_results <- d2pbc_srvyr %>%
attr(survey_full_results, "svyby") <- NULL
attr(survey_full_results, "call") <- NULL
attr(survey_simple_results, "svyby") <- NULL
attr(survey_simple_results, "call") <- NULL

suppressWarnings(srvyr_full_results <- d2fpbc_srvyr %>%
group_by(sex) %>%
summarize(mean = survey_mean(bili),
total = survey_total(bili),
median = survey_median(bili, vartype = "se", df = NULL)))

suppressWarnings(srvyr_simple_results <- d2spbc_srvyr %>%
group_by(sex) %>%
summarize(mean = survey_mean(bili),
total = survey_total(bili),
median = survey_median(bili, vartype = "se", df = NULL)))

test_that(
"as_survey_twophase(method='full') gets same mean / total / median / ratio in (grouped)",
expect_df_equal(survey_full_results, srvyr_full_results))

test_that(
"as_survey_twophase gets same mean / total / median / ratio in (grouped)",
expect_df_equal(survey_results, srvyr_results))
"as_survey_twophase(method='simple') gets same mean / total / median / ratio in (grouped)",
expect_df_equal(survey_simple_results, srvyr_simple_results))

0 comments on commit 9a7ba75

Please sign in to comment.