From 4d6bfeecbabd23e591356be5adb4cd55c8087f9f Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 16 Oct 2024 10:04:39 +0200 Subject: [PATCH] added rp-online.de --- NAMESPACE | 1 + R/deliver_rp_online_de.R | 25 +++++++++++++++++++++++++ inst/status.csv | 1 + 3 files changed, 27 insertions(+) create mode 100644 R/deliver_rp_online_de.R diff --git a/NAMESPACE b/NAMESPACE index 7dab8af..b296dee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,6 +53,7 @@ S3method(pb_deliver_paper,nypost_com) S3method(pb_deliver_paper,nytimes_com) S3method(pb_deliver_paper,parlamentnilisty_cz) S3method(pb_deliver_paper,prosieben_de) +S3method(pb_deliver_paper,rp_online_de) S3method(pb_deliver_paper,rte_ie) S3method(pb_deliver_paper,rtl_de) S3method(pb_deliver_paper,rtl_nl) diff --git a/R/deliver_rp_online_de.R b/R/deliver_rp_online_de.R new file mode 100644 index 0000000..e8fb160 --- /dev/null +++ b/R/deliver_rp_online_de.R @@ -0,0 +1,25 @@ +#' @export +pb_deliver_paper.rp_online_de <- function(x, verbose = NULL, pb, ...) { + pb_tick(x, verbose, pb) + # raw html is stored in column content_raw + html <- rvest::read_html(x$content_raw) + + json_txt <- rvest::html_nodes(html, "script[type = \"application/ld+json\"] ")[1] %>% rvest::html_text() + json_df <- jsonlite::fromJSON(json_txt) + + datetime <- lubridate::as_datetime(json_df$datePublished) + headline <- json_df$headline + author <- toString(json_df$author$name) + text <- html %>% + rvest::html_nodes("strong[data-cy=\"intro\"],div[data-cy=\"article_content\"] p") %>% + rvest::html_text2() %>% + paste(collapse = "\n") + + s_n_list( + datetime, + author, + headline, + text, + json_df # dumping the whole json data of an article + ) +} diff --git a/inst/status.csv b/inst/status.csv index 1dc9d3e..6cf23f2 100644 --- a/inst/status.csv +++ b/inst/status.csv @@ -59,6 +59,7 @@ "pagesix.com","![](https://img.shields.io/badge/status-requested-lightgrey)","","[#1](https://github.com/JBGruber/paperboy/issues/1)",NA "parlamentnilisty.cz","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@JBGruber](https://github.com/JBGruber/)","","http://www.parlamentnilisty.cz/export/rss.aspx" "prosieben.de","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@schochastics](https://github.com/schochastics)","[#23](https://github.com/JBGruber/paperboy/issues/23)",NA +"rp-online.de","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@schochastics](https://github.com/schochastics)","[#23](https://github.com/JBGruber/paperboy/issues/23)", "https://rp-online.de/feed.rss" "rte.ie","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@JBGruber](https://github.com/JBGruber/)","","https://www.rte.ie/feeds/rss/?index=/news/" "rtl.de","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@schochastics](https://github.com/schochastics)","[#23](https://github.com/JBGruber/paperboy/issues/23)","https://www.rtl.de/rss/feed/news" "rtl.nl","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@JBGruber](https://github.com/JBGruber/)","","https://www.rtlnieuws.nl/rss.xml"