Skip to content

Commit

Permalink
escape pacman
Browse files Browse the repository at this point in the history
  • Loading branch information
JBGruber committed Oct 26, 2023
1 parent d3d6b6d commit 1984544
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 6 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: paperboy
Title: Comprehensive Collection of News Media Scrapers
Version: 0.0.5.9000
Date: 2023-10-25
Date: 2023-10-26
Authors@R:
person(given = "Johannes B.",
family = "Gruber",
Expand Down
12 changes: 8 additions & 4 deletions R/deliver.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,20 @@ pb_deliver.data.frame <- function(x, try_default = TRUE, verbose = NULL, ...) {
pb <- NULL
if (verbose) {
oldstyle <- getOption("cli.progress_bar_style")
oldstyle_ascii <- getOption("cli.progress_bar_style_ascii")
options(cli.progress_bar_style = list(
current = cli::col_yellow(""),
current = cli::col_yellow("\u15E7"),
complete = cli::col_grey("\u2010"),
incomplete = cli::col_red("\u2022")
))
options(cli.progress_bar_style_ascii = list(
current = cli::col_yellow("C"),
complete = cli::col_grey("-"),
incomplete = cli::col_grey("o")
))
pb <- cli::cli_progress_bar("Parsing raw html:", total = nrow(x))
}




out <- purrr::list_rbind(purrr::map(domains, function(u) {

class(u) <- c(
Expand All @@ -96,6 +99,7 @@ pb_deliver.data.frame <- function(x, try_default = TRUE, verbose = NULL, ...) {
if (verbose) {
cli::cli_progress_done()
options(cli.progress_bar_style = oldstyle)
options(cli.progress_bar_style_ascii = oldstyle_ascii)
}

# tell user about warnings
Expand Down
3 changes: 2 additions & 1 deletion R/deliver_parlamentnilisty_cz.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
pb_deliver_paper.parlamentnilisty_cz <- function(x, verbose = NULL, pb, ...) {

# raw html is stored in column content_raw
html <- rvest::read_html(x$content_raw)
html <- rvest::read_html(charToRaw(enc2utf8(x$content_raw)))
pb_tick(x, verbose, pb)

# data about the article is nicely stored in a json string
data <- html %>%
rvest::html_elements("[type=\"application/ld+json\"]") %>%
rvest::html_text() %>%
gsub("[\r\n]", "", .) %>% # sometimes uses illegal line breaks
lapply(jsonlite::fromJSON, simplifyVector = FALSE)

# usually there are more than one,
Expand Down

0 comments on commit 1984544

Please sign in to comment.