diff --git a/DESCRIPTION b/DESCRIPTION index 094cc28..d2350e4 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: paperboy Title: Comprehensive Collection of News Media Scrapers Version: 0.0.5.9000 -Date: 2023-11-21 +Date: 2023-11-30 Authors@R: person(given = "Johannes B.", family = "Gruber", diff --git a/R/deliver_default.R b/R/deliver_default.R index 260f70e..3408df7 100644 --- a/R/deliver_default.R +++ b/R/deliver_default.R @@ -6,7 +6,7 @@ pb_deliver_paper.default <- function(x, verbose = NULL, pb, ...) { if (methods::is(html, "try-error")) { # TODO: work-around for weird encoding issues tmp <- tempfile(fileext = ".html") - writeLines(x$content_raw, tmp) + writeLines(gsub("[^ -~]+", "", x$content_raw , useBytes = TRUE), tmp) html <- rvest::read_html(tmp) } warn_once(x$domain) diff --git a/tests/testthat/test-deliver.R b/tests/testthat/test-deliver.R index 86a8bb2..f69de97 100644 --- a/tests/testthat/test-deliver.R +++ b/tests/testthat/test-deliver.R @@ -46,11 +46,3 @@ test_that("Test huffpost scraper", { }, c("tbl_df", "tbl", "data.frame", "9", "1")) }) -test_that("Local test data can be parsed", { - skip_if(!file.exists("test_data.rds")) - test_data <- readRDS("test_data.rds") - expect_equal({ - out <- pb_deliver(test_data) - c(class(out), ncol(out), nrow(out)) - }, c("tbl_df", "tbl", "data.frame", "9", "80")) -})