From 21ac7979e5aa6e8eaa95cac2be8157afd18c6126 Mon Sep 17 00:00:00 2001 From: JBGruber Date: Mon, 9 Oct 2023 17:56:07 +0200 Subject: [PATCH] bug fixes in backend functions --- R/deliver.R | 2 +- R/deliver_cnet_com.R | 37 +++++++++++++++++++++++++++---------- R/utils.R | 3 ++- R/utils_dev.R | 17 ++++++++++------- man/use_new_parser.Rd | 2 ++ 5 files changed, 42 insertions(+), 19 deletions(-) diff --git a/R/deliver.R b/R/deliver.R index 9b65d58..4c3f907 100755 --- a/R/deliver.R +++ b/R/deliver.R @@ -45,7 +45,7 @@ pb_deliver.data.frame <- function(x, verbose = NULL, ...) { bad_status <- x$status != 200L x <- x[!bad_status, ] - if (verbose && sum(bad_status) > 0) + if (isTRUE(verbose) && isTRUE(sum(bad_status) > 0)) cli::cli_alert_warning("{sum(bad_status)} URL{?s} removed due to bad status.") domains <- split(x, x$domain, drop = TRUE) diff --git a/R/deliver_cnet_com.R b/R/deliver_cnet_com.R index 9c94f10..82bf895 100644 --- a/R/deliver_cnet_com.R +++ b/R/deliver_cnet_com.R @@ -18,16 +18,33 @@ pb_deliver_paper.cnet_com <- function(x, verbose = NULL, pb, ...) { as.POSIXct() } - # headline - headline <- html %>% - rvest::html_elements("[property=\"og:title\"]") %>% - rvest::html_attr("content") - - # author - author <- html %>% - rvest::html_elements(".c-globalAuthor_link,.author") %>% - rvest::html_text2() %>% - toString() + if (condition) { + data <- html %>% + rvest::html_element("[type=\"application/ld+json\"]") %>% + rvest::html_text() %>% + jsonlite::fromJSON() + + datetime <- data$datePublished %>% + lubridate::as_datetime() + + # headline + headline <- data$headline + + # author + author <- data$author$name + + } else { + # headline + headline <- html %>% + rvest::html_elements("[property=\"og:title\"]") %>% + rvest::html_attr("content") + + # author + author <- html %>% + rvest::html_elements(".c-globalAuthor_link,.author") %>% + rvest::html_text2() %>% + toString() + } # text text <- html %>% diff --git a/R/utils.R b/R/utils.R index f29992e..3e1d507 100644 --- a/R/utils.R +++ b/R/utils.R @@ -145,7 +145,8 @@ warn_once <- function(id) { url_get_basename <- function(x) { - sub(adaR::ada_get_pathname(x), "", x, fixed = TRUE) + host <- url_get_domain(x) + paste0("https://", host) } diff --git a/R/utils_dev.R b/R/utils_dev.R index 086d80a..08475e1 100644 --- a/R/utils_dev.R +++ b/R/utils_dev.R @@ -31,10 +31,12 @@ #' @export #' #' @examples +#' \dontrun{ #' use_new_parser(x = "https://www.buzzfeed.com/", #' author = "[@JBGruber](https://github.com/JBGruber/)", #' issue = "[#1](https://github.com/JBGruber/paperboy/issues/1)", #' rss = "https://www.buzzfeed.com/rss") +#' } #' @md use_new_parser <- function(x, author = "", @@ -51,12 +53,12 @@ use_new_parser <- function(x, r_file <- pb_new(x) cli::cli_progress_done() - cli::cli_progress_step( - "Trying to find RSS feed", - msg_done = "RSS feed noted", - msg_failed = "No RSS feed in the usual locations. Add to inst/status.csv manually" - ) if (is.null(rss)) { + cli::cli_progress_step( + "Trying to find RSS feed", + msg_done = "RSS feed noted", + msg_failed = "No RSS feed in the usual locations. Add to inst/status.csv manually" + ) rss <- pb_find_rss(x) } if (rss == "") { @@ -153,7 +155,6 @@ use_new_parser <- function(x, "Finalising entry in inst/status.csv", msg_done = "status.csv updated." ) - x <- utils::head(adaR::ada_get_domain(x), 1) status <- utils::read.csv("inst/status.csv") status[status$domain == gsub("^www.", "", x), "status"] <- "![](https://img.shields.io/badge/status-gold-%23ffd700.svg)" @@ -183,9 +184,11 @@ use_new_parser <- function(x, #' } pb_new <- function(np, author = "", issue = "") { - np <- utils::head(adaR::ada_get_domain(np), 1) + np <- utils::head(url_get_domain(np), 1) np_ <- classify(np) + if (is.na(np)) cli::cli_abort("invalid domain name: {np}") + template <- system.file("templates", "deliver_.R", package = "paperboy") %>% readLines() %>% gsub("{{newspaper}}", np_, x = ., fixed = TRUE) diff --git a/man/use_new_parser.Rd b/man/use_new_parser.Rd index 0e32803..6f582ad 100644 --- a/man/use_new_parser.Rd +++ b/man/use_new_parser.Rd @@ -44,8 +44,10 @@ As might be obvious, not all steps can be performed in a single action. Rather the idea is to run the function multiple times, until all is done. } \examples{ +\dontrun{ use_new_parser(x = "https://www.buzzfeed.com/", author = "[@JBGruber](https://github.com/JBGruber/)", issue = "[#1](https://github.com/JBGruber/paperboy/issues/1)", rss = "https://www.buzzfeed.com/rss") } +}