diff --git a/R/collect.R b/R/collect.R index 6c43751..a9aab51 100644 --- a/R/collect.R +++ b/R/collect.R @@ -94,6 +94,9 @@ pb_collect <- function(urls, ) %>% dplyr::rename(url = urls) + # conversion removes illegal characters + out$content_raw <- iconv(out$content_raw, from = "ISO-8859-1", to = "UTF-8") + if (collect_rss) { rss <- grepl("", out$content_raw) if (any(rss)) { diff --git a/R/deliver.R b/R/deliver.R index a8df11f..121d6d6 100755 --- a/R/deliver.R +++ b/R/deliver.R @@ -70,9 +70,12 @@ pb_deliver.data.frame <- function(x, verbose = NULL, ...) { ) - out <- purrr::list_rbind(purrr::map(seq_along(u$url), function(i) - pb_deliver_paper(x = u[i, ], verbose, pb))) - return(out) + purrr::list_rbind(purrr::map(seq_along(u$url), function(i) { + cbind( + u[i, c("url", "expanded_url", "domain", "status")], + pb_deliver_paper(x = u[i, ], verbose, pb) + ) + })) })) if (verbose) { @@ -80,6 +83,13 @@ pb_deliver.data.frame <- function(x, verbose = NULL, ...) { options(cli.progress_bar_style = oldstyle) } + # tell user about warnings + ws <- mget(ls(inform_env), envir = inform_env) + if (length(ws) > 0) { + names(ws) <- rep("i", length(ws)) + cli::cli_warn(ws) + } + return(normalise_df(out)) } diff --git a/R/utils.R b/R/utils.R index 68eeef4..2864a0d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -164,8 +164,7 @@ pb_tick <- function(x, verbose, pb) { #' @noRd warn_once <- function(id) { if (is.null(inform_env[[id]])) { - cli::cli_warn("No parser for domain {.strong {id}} yet, attempting generic approach.") - inform_env[[id]] <- TRUE + inform_env[[id]] <- cli::format_warning("No parser for domain {.strong {id}} yet, attempting generic approach.") } } inform_env <- new.env()