Skip to content

Commit

Permalink
simplify deliver
Browse files Browse the repository at this point in the history
  • Loading branch information
JBGruber committed Nov 4, 2023
1 parent a0852b9 commit c365a88
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 18 deletions.
20 changes: 6 additions & 14 deletions R/deliver.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ pb_deliver.data.frame <- function(x, try_default = TRUE, verbose = NULL, ...) {
}
}

domains <- split(x, x$domain, drop = TRUE)

pb <- NULL
if (verbose) {
oldstyle <- getOption("cli.progress_bar_style")
Expand All @@ -80,20 +78,14 @@ pb_deliver.data.frame <- function(x, try_default = TRUE, verbose = NULL, ...) {
pb <- cli::cli_progress_bar("Parsing raw html:", total = nrow(x))
}

out <- purrr::list_rbind(purrr::map(domains, function(u) {
x$class <- classify(x$domain)

class(u) <- c(
classify(utils::head(u$domain, 1)),
class(u)
out <- purrr::list_rbind(purrr::map(purrr::transpose(x), function(r) {
class(r) <- r$class
cbind(
r[c("url", "expanded_url", "domain", "status")],
pb_deliver_paper(x = r, verbose, pb)
)


purrr::list_rbind(purrr::map(seq_along(u$url), function(i) {
cbind(
u[i, c("url", "expanded_url", "domain", "status")],
pb_deliver_paper(x = u[i, ], verbose, pb)
)
}))
}))

if (verbose) {
Expand Down
8 changes: 4 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ pb_available <- function(...) {
#' not as is sounds, turns urls into class conform string
#' @noRd
classify <- function(url) {
# here for data collected with older version of paperboy
url <- sub("^www\\.", "", url)
replace_all(url, c(".", "-"), rep("_", 2L), fixed = TRUE)
url <- sub("^www\\.", "", as.factor(url)) # here for data collected with older version of paperboy
# as.factor improves speed
replace_all(as.factor(url), c(".", "-"), rep("_", 2L), fixed = TRUE)
}


Expand Down Expand Up @@ -140,7 +140,7 @@ pb_tick <- function(x, verbose, pb) {
#' @noRd
warn_once <- function(id) {
if (is.null(inform_env[[id]])) {
inform_now_env[[id]] <- cli::format_message("No parser for domain {.strong {id}} yet, attempting generic approach.")
inform_now_env[[id]] <- cli::format_message("No parser for domain {.strong {id}} yet, attempting generic approach.\n")
inform_env[[id]] <- TRUE
}
}
Expand Down

0 comments on commit c365a88

Please sign in to comment.