From bc7ba880e823d00e679a751d04f4198d1af6748a Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 16 Oct 2024 10:21:20 +0200 Subject: [PATCH] added t-online.de --- NAMESPACE | 1 + R/deliver_t_online_de.R | 26 ++++++++++++++++++++++++++ inst/status.csv | 1 + 3 files changed, 28 insertions(+) create mode 100644 R/deliver_t_online_de.R diff --git a/NAMESPACE b/NAMESPACE index b296dee..5f161f9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -64,6 +64,7 @@ S3method(pb_deliver_paper,sky_com) S3method(pb_deliver_paper,spiegel_de) S3method(pb_deliver_paper,stern_de) S3method(pb_deliver_paper,sueddeutsche_de) +S3method(pb_deliver_paper,t_online_de) S3method(pb_deliver_paper,tagesschau_de) S3method(pb_deliver_paper,telegraaf_nl) S3method(pb_deliver_paper,telegraph_co_uk) diff --git a/R/deliver_t_online_de.R b/R/deliver_t_online_de.R new file mode 100644 index 0000000..05bdb8a --- /dev/null +++ b/R/deliver_t_online_de.R @@ -0,0 +1,26 @@ +#' @export +pb_deliver_paper.t_online_de <- function(x, verbose = NULL, pb, ...) { + pb_tick(x, verbose, pb) + # raw html is stored in column content_raw + html <- rvest::read_html(x$content_raw) + + json_txt <- rvest::html_nodes(html, "script[type = \"application/ld+json\"] ")[1] %>% rvest::html_text() + json_df <- jsonlite::fromJSON(json_txt) + json_df <- json_df$`@graph`[1, ] + + datetime <- lubridate::as_datetime(json_df$datePublished) + headline <- json_df$headline + author <- toString(json_df$author[[1]]$name) + text <- html %>% + rvest::html_nodes("div[data-testid=\"ArticleBody.StreamLayout\"] p") %>% + rvest::html_text2() %>% + paste(collapse = "\n") + + s_n_list( + datetime, + author, + headline, + text, + json_df # dumping the whole json data of an article + ) +} diff --git a/inst/status.csv b/inst/status.csv index 6cf23f2..b1ed841 100644 --- a/inst/status.csv +++ b/inst/status.csv @@ -82,6 +82,7 @@ "thismorningwithgordondeal.com","![](https://img.shields.io/badge/status-requested-lightgrey)","","[#1](https://github.com/JBGruber/paperboy/issues/1)",NA "time.com","![](https://img.shields.io/badge/status-requested-lightgrey)","","[#1](https://github.com/JBGruber/paperboy/issues/1)",NA "tribpub.com","![](https://img.shields.io/badge/status-requested-lightgrey)","","[#1](https://github.com/JBGruber/paperboy/issues/1)",NA +"t-online.de","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@schochastics](https://github.com/schochastics)","[#23](https://github.com/JBGruber/paperboy/issues/23)","https://www.t-online.de/nachrichten/feed.rss" "us.cnn.com","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@JBGruber](https://github.com/JBGruber/)","",NA "usatoday.com","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@JBGruber](https://github.com/JBGruber/)","",NA "volkskrant.nl","![](https://img.shields.io/badge/status-gold-%23ffd700.svg)","[@JBGruber](https://github.com/JBGruber/)","","https://www.volkskrant.nl/rss.xml"