From 21ac7979e5aa6e8eaa95cac2be8157afd18c6126 Mon Sep 17 00:00:00 2001
From: JBGruber <JohannesB.Gruber@gmail.com>
Date: Mon, 9 Oct 2023 17:56:07 +0200
Subject: [PATCH] bug fixes in backend functions

---
 R/deliver.R           |  2 +-
 R/deliver_cnet_com.R  | 37 +++++++++++++++++++++++++++----------
 R/utils.R             |  3 ++-
 R/utils_dev.R         | 17 ++++++++++-------
 man/use_new_parser.Rd |  2 ++
 5 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/R/deliver.R b/R/deliver.R
index 9b65d58..4c3f907 100755
--- a/R/deliver.R
+++ b/R/deliver.R
@@ -45,7 +45,7 @@ pb_deliver.data.frame <- function(x, verbose = NULL, ...) {
   bad_status <- x$status != 200L
   x <- x[!bad_status, ]
 
-  if (verbose && sum(bad_status) > 0)
+  if (isTRUE(verbose) && isTRUE(sum(bad_status) > 0))
     cli::cli_alert_warning("{sum(bad_status)} URL{?s} removed due to bad status.")
 
   domains <- split(x, x$domain, drop = TRUE)
diff --git a/R/deliver_cnet_com.R b/R/deliver_cnet_com.R
index 9c94f10..82bf895 100644
--- a/R/deliver_cnet_com.R
+++ b/R/deliver_cnet_com.R
@@ -18,16 +18,33 @@ pb_deliver_paper.cnet_com <- function(x, verbose = NULL, pb, ...) {
       as.POSIXct()
   }
 
-  # headline
-  headline <- html %>%
-    rvest::html_elements("[property=\"og:title\"]") %>%
-    rvest::html_attr("content")
-
-  # author
-  author <- html %>%
-    rvest::html_elements(".c-globalAuthor_link,.author")  %>%
-    rvest::html_text2() %>%
-    toString()
+  if (condition) {
+    data <- html %>%
+      rvest::html_element("[type=\"application/ld+json\"]") %>%
+      rvest::html_text() %>%
+      jsonlite::fromJSON()
+
+    datetime <- data$datePublished %>%
+      lubridate::as_datetime()
+
+    # headline
+    headline <- data$headline
+
+    # author
+    author <- data$author$name
+
+  } else {
+    # headline
+    headline <- html %>%
+      rvest::html_elements("[property=\"og:title\"]") %>%
+      rvest::html_attr("content")
+
+    # author
+    author <- html %>%
+      rvest::html_elements(".c-globalAuthor_link,.author")  %>%
+      rvest::html_text2() %>%
+      toString()
+  }
 
   # text
   text <- html %>%
diff --git a/R/utils.R b/R/utils.R
index f29992e..3e1d507 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -145,7 +145,8 @@ warn_once <- function(id) {
 
 
 url_get_basename <- function(x) {
-  sub(adaR::ada_get_pathname(x), "", x, fixed = TRUE)
+  host <- url_get_domain(x)
+  paste0("https://", host)
 }
 
 
diff --git a/R/utils_dev.R b/R/utils_dev.R
index 086d80a..08475e1 100644
--- a/R/utils_dev.R
+++ b/R/utils_dev.R
@@ -31,10 +31,12 @@
 #' @export
 #'
 #' @examples
+#' \dontrun{
 #' use_new_parser(x = "https://www.buzzfeed.com/",
 #'                author = "[@JBGruber](https://github.com/JBGruber/)",
 #'                issue = "[#1](https://github.com/JBGruber/paperboy/issues/1)",
 #'                rss = "https://www.buzzfeed.com/rss")
+#' }
 #' @md
 use_new_parser <- function(x,
                            author = "",
@@ -51,12 +53,12 @@ use_new_parser <- function(x,
   r_file <- pb_new(x)
   cli::cli_progress_done()
 
-  cli::cli_progress_step(
-    "Trying to find RSS feed",
-    msg_done = "RSS feed noted",
-    msg_failed = "No RSS feed in the usual locations. Add to inst/status.csv manually"
-  )
   if (is.null(rss)) {
+    cli::cli_progress_step(
+      "Trying to find RSS feed",
+      msg_done = "RSS feed noted",
+      msg_failed = "No RSS feed in the usual locations. Add to inst/status.csv manually"
+    )
     rss <- pb_find_rss(x)
   }
   if (rss == "") {
@@ -153,7 +155,6 @@ use_new_parser <- function(x,
       "Finalising entry in inst/status.csv",
       msg_done = "status.csv updated."
     )
-    x <- utils::head(adaR::ada_get_domain(x), 1)
     status <- utils::read.csv("inst/status.csv")
     status[status$domain == gsub("^www.", "", x), "status"] <-
       "![](https://img.shields.io/badge/status-gold-%23ffd700.svg)"
@@ -183,9 +184,11 @@ use_new_parser <- function(x,
 #' }
 pb_new <- function(np, author = "", issue = "") {
 
-  np <- utils::head(adaR::ada_get_domain(np), 1)
+  np <- utils::head(url_get_domain(np), 1)
   np_ <- classify(np)
 
+  if (is.na(np)) cli::cli_abort("invalid domain name: {np}")
+
   template <- system.file("templates", "deliver_.R", package = "paperboy") %>%
     readLines() %>%
     gsub("{{newspaper}}", np_, x = ., fixed = TRUE)
diff --git a/man/use_new_parser.Rd b/man/use_new_parser.Rd
index 0e32803..6f582ad 100644
--- a/man/use_new_parser.Rd
+++ b/man/use_new_parser.Rd
@@ -44,8 +44,10 @@ As might be obvious, not all steps can be performed in a single action.
 Rather the idea is to run the function multiple times, until all is done.
 }
 \examples{
+\dontrun{
 use_new_parser(x = "https://www.buzzfeed.com/",
                author = "[@JBGruber](https://github.com/JBGruber/)",
                issue = "[#1](https://github.com/JBGruber/paperboy/issues/1)",
                rss = "https://www.buzzfeed.com/rss")
 }
+}