Skip to content

Commit

Permalink
Bug/tidyselect not working (#85)
Browse files Browse the repository at this point in the history
* Allow using tidyselect helpers with `col_select`

* Add some tests for tidyselect helpers

* Update documentation

* recid and partnership filter
allow for recid and partnership filter when they are not specified to select in columns

* Style package

* update tests

---------

Co-authored-by: Moohan <Moohan@users.noreply.github.com>
Co-authored-by: Zihao Li <lizihao_anu@outlook.com>
Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
  • Loading branch information
4 people authored Aug 8, 2024
1 parent a69a61c commit 0cb714f
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 28 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ Language: en-GB
LazyData: true
Roxygen: list(markdown = TRUE, roclets = c("collate","namespace", "rd",
"vignette" ))
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
65 changes: 45 additions & 20 deletions R/read_slf.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,19 @@ read_slf <- function(
# but the column wasn't selected we need to add it (and remove later)
remove_partnership_var <- FALSE
remove_recid_var <- FALSE
if (!is.null(col_select)) {
if (!is.null(partnerships) &
!("hscp2018" %in% col_select)) {
col_select <- c(col_select, "hscp2018")
if (!rlang::quo_is_null(rlang::enquo(col_select))) {
if (!is.null(partnerships) &&
stringr::str_detect(rlang::quo_text(rlang::enquo(col_select)),
stringr::coll("hscp2018"),
negate = TRUE
)) {
remove_partnership_var <- TRUE
}
if (!is.null(recids) & file_version == "episode" &
!("recid" %in% col_select)) {
col_select <- c(col_select, "recid")
if (!is.null(recids) && file_version == "episode" &&
stringr::str_detect(rlang::quo_text(rlang::enquo(col_select)),
stringr::coll("recid"),
negate = TRUE
)) {
remove_recid_var <- TRUE
}
}
Expand All @@ -71,27 +75,48 @@ read_slf <- function(
function(file_path) {
slf_table <- arrow::read_parquet(
file = file_path,
col_select = !!col_select,
col_select = {{ col_select }},
as_data_frame = FALSE
)

if (!is.null(recids)) {
if (!is.null(partnerships)) {
if (remove_partnership_var) {
slf_table <- cbind(
slf_table,
arrow::read_parquet(
file = file_path,
col_select = "hscp2018",
as_data_frame = FALSE
)
)
}
slf_table <- dplyr::filter(
slf_table,
.data$recid %in% recids
.data$hscp2018 %in% partnerships
)
if (remove_partnership_var) {
slf_table <- dplyr::select(slf_table, -"hscp2018")
}
}
if (!is.null(partnerships)) {

if (!is.null(recids)) {
if (remove_recid_var) {
slf_table <- cbind(
slf_table,
arrow::read_parquet(
file = file_path,
col_select = "recid",
as_data_frame = FALSE
)
)
}
slf_table <- dplyr::filter(
slf_table,
.data$hscp2018 %in% partnerships
.data$recid %in% recids
)
}
if (remove_partnership_var) {
slf_table <- dplyr::select(slf_table, -"hscp2018")
}
if (remove_recid_var) {
slf_table <- dplyr::select(slf_table, -"recid")
if (remove_recid_var) {
slf_table <- dplyr::select(slf_table, -"recid")
}
}

return(slf_table)
Expand Down Expand Up @@ -149,7 +174,7 @@ read_slf_episode <- function(
return(
read_slf(
year = year,
col_select = unique(col_select),
col_select = {{ col_select }},
file_version = "episode",
partnerships = unique(partnerships),
recids = unique(recids),
Expand Down Expand Up @@ -193,7 +218,7 @@ read_slf_individual <- function(
return(
read_slf(
year = year,
col_select = unique(col_select),
col_select = {{ col_select }},
file_version = "individual",
partnerships = unique(partnerships),
as_data_frame = as_data_frame,
Expand Down
2 changes: 1 addition & 1 deletion man/read_slf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/read_slf_episode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/read_slf_individual.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions tests/testthat/test-multiple_years.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ test_that("read multiple years works for individual file", {
indiv <- read_slf_individual(c("1718", "1819"),
col_select = c("year", "anon_chi")
) %>%
dplyr::slice_sample(n = 100)
dplyr::group_by(year) %>%
dplyr::slice_sample(n = 50) %>%
dplyr::ungroup()

# Test for anything odd
expect_s3_class(indiv, "tbl_df")
Expand All @@ -34,7 +36,9 @@ test_that("read multiple years works for episode file", {
ep <- read_slf_episode(c("1718", "1819"),
col_select = c("year", "anon_chi")
) %>%
dplyr::slice_sample(n = 100)
dplyr::group_by(year) %>%
dplyr::slice_sample(n = 50) %>%
dplyr::ungroup()

# Test for anything odd
expect_s3_class(ep, "tbl_df")
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-read_slf_episode.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ for (year in years) {

test_that("Episode file has the expected number of variables", {
# Test for correct number of variables (will need updating)
expect_length(ep_file, 241)
expect_length(ep_file, 251)
})
}
2 changes: 1 addition & 1 deletion tests/testthat/test-read_slf_individual.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ test_that("Reads individual file correctly", {
expect_equal(nrow(indiv_file), 100)

# Test for correct number of variables (will need updating)
expect_length(indiv_file, 180)
expect_length(indiv_file, 193)
}
})

Expand Down
55 changes: 55 additions & 0 deletions tests/testthat/test-tidyselect_columns.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
skip_on_ci()


test_that("tidyselect helpers work for column selection in the episode file", {
expect_named(
read_slf_episode("1920", col_select = dplyr::starts_with("dd")),
c("dd_responsible_lca", "dd_quality")
)
expect_named(
read_slf_episode("1920", col_select = c("year", dplyr::starts_with("dd"))),
c("year", "dd_responsible_lca", "dd_quality")
)
expect_named(
read_slf_episode("1920", col_select = !dplyr::matches("[aeiou]"))
)
})

test_that("col_select works when columns are added", {
expect_named(
read_slf_episode("1920", col_select = "year", recids = "DD"),
"year"
)
expect_named(
read_slf_episode("1920", col_select = "year", partnerships = "S37000001"),
"year"
)
expect_named(
read_slf_episode(
"1920",
col_select = c("year", dplyr::contains("dd")),
recids = "DD"
)
)
expect_named(
read_slf_episode(
"1920",
col_select = c("year", dplyr::contains("cij")),
partnerships = "S37000001"
)
)
})

test_that("tidyselect helpers work for column selection in the individual file", {
expect_named(
read_slf_individual("1920", col_select = dplyr::starts_with("dd")),
c("dd_noncode9_episodes", "dd_noncode9_beddays", "dd_code9_episodes", "dd_code9_beddays")
)
expect_named(
read_slf_individual("1920", col_select = c("year", dplyr::starts_with("dd"))),
c("year", "dd_noncode9_episodes", "dd_noncode9_beddays", "dd_code9_episodes", "dd_code9_beddays")
)
expect_named(
read_slf_individual("1920", col_select = !dplyr::matches("[aeiou]"))
)
})

0 comments on commit 0cb714f

Please sign in to comment.