Skip to content

Commit

Permalink
Merge pull request #19 from scarnecchia/DEV-13
Browse files Browse the repository at this point in the history
DEV-13: Update tool to accomadate Oryx site changes.
  • Loading branch information
scarnecchia authored Apr 21, 2022
2 parents 23f2c45 + 7180340 commit 651ec48
Show file tree
Hide file tree
Showing 17 changed files with 7,089 additions and 60,593 deletions.
44 changes: 27 additions & 17 deletions R/scrape_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
#'
#' @return a tibble
#' @export
scrape_data <- function() {
scrape_data <- function(country) {
if (country == "Russia") {
url <-
russia_url
} else {
url <-
ukraine_url
}

materiel <-
get_data(
"https://www.oryxspioenkop.com/2022/02/attack-on-europe-documenting-equipment.html",
"article"
) %>%
get_data(url,
"article") %>%
rvest::html_elements("li")

# Retreive the start position of each country
country_pos <- materiel %>% rvest::html_text2() %>%
# T-64BV is the first row in the tank list and marks the beginning of each country
stringr::str_which("T-64BV")

#' Run Program
data <-
tibble::tibble(
country = character(),
Expand All @@ -31,8 +31,7 @@ scrape_data <- function() {
status <- materiel[[a]] %>% rvest::html_elements("a")
for (b in seq_along(status)) {
counter = counter + 1
data[counter, 1] <-
ifelse(a < country_pos[2], "Russia", "Ukraine")
data[counter, 1] <- country
data[counter, 2] <- extract_origin(materiel, a)
data[counter, 3] <- extract_system(materiel, a)
data[counter, 4] <- extract_status(status, b)
Expand All @@ -45,6 +44,15 @@ scrape_data <- function() {
tidyr::unnest_longer(status) %>%
dplyr::mutate(date_recorded = as.Date(lubridate::today())) %>%
trim_all()
}

create_data <- function() {
russia <- scrape_data("Russia")
ukraine <- scrape_data("Ukraine")

data <- russia %>%
dplyr::bind_rows(ukraine) %>%
dplyr::select(country, origin, system, status, url, date_recorded)

previous <- get_inputfile("totals_by_system") %>%
trim_all() %>%
Expand All @@ -61,12 +69,14 @@ scrape_data <- function() {
)) %>%
dplyr::arrange(country, system, date_recorded)

data <- check %>% dplyr::bind_rows(get_inputfile("totals_by_system")) %>%
dplyr::arrange(country, system, date_recorded)
data <- check %>% dplyr::bind_rows(previous, .id = NULL) %>%
dplyr::arrange(country, system, date_recorded)

previous %>% readr::write_csv("inputfiles/totals_by_system.csv.bak")
previous %>% readr::write_csv("inputfiles/totals_by_system.csv.bak")

data %>% readr::write_csv(glue::glue("inputfiles/totals_by_system{lubridate::today()+1}.csv"))
data %>% readr::write_csv(glue::glue(
"inputfiles/totals_by_system{lubridate::today()+1}.csv"
))

} else {
logr::put("No new data")
Expand Down
38 changes: 28 additions & 10 deletions R/totals_by_type.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,18 @@
#' @description Gets data by system category.
#'
#' @return a tibble
totals_by_type <- function() {
create_by_type <- function(country) {
if (country == "Russia") {
url <-
russia_url
} else {
url <-
ukraine_url
}

heads <-
get_data(
"https://www.oryxspioenkop.com/2022/02/attack-on-europe-documenting-equipment.html",
url,
"article div"
) %>%
rvest::html_elements("h3") %>%
Expand All @@ -15,8 +23,7 @@ totals_by_type <- function() {
heads <- heads[nchar(heads) > 0]

# Get the positons of the Russia and Ukraine headers
rus_pos <- heads %>% stringr::str_which("Russia") %>% as.double()
ukr_pos <- heads %>% stringr::str_which("Ukraine") %>% as.double()
pos <- heads %>% stringr::str_which(country) %>% as.double()

totals <- tibble(
country = character(),
Expand Down Expand Up @@ -45,20 +52,31 @@ totals_by_type <- function() {
}


totals_df <- totals %>%
country_df <- totals %>%
dplyr::mutate(
dplyr::across(destroyed:damaged, ~ as.double(tidyr::replace_na(.x, "0"))),
type_total = destroyed + abandoned + captured + damaged,
row_id = 1:n(),
country = dplyr::case_when(row_id < ukr_pos ~ "Russia",
row_id >= ukr_pos ~ "Ukraine")
row_id = 1:n()
) %>%
dplyr::mutate(country = tidyr::replace_na(country, !!!country)) %>%
select(-row_id) %>%
dplyr::mutate(
equipment = replace(equipment, rus_pos, "All Types"),
equipment = replace(equipment, ukr_pos, "All Types")
equipment = replace(equipment, pos, "All Types"),
) %>%
dplyr::rename(equipment_type = equipment)

return(country_df)
}

totals_by_type <- function() {
russia <- create_by_type("Russia")
ukraine <- create_by_type("Ukraine")

totals_df <- russia %>%
dplyr::bind_rows(ukraine, .id=NULL)

return(totals_df)
}



4,630 changes: 2,475 additions & 2,155 deletions index.html

Large diffs are not rendered by default.

592 changes: 592 additions & 0 deletions inputfiles/daily_count_baseline2022-04-20.csv

Large diffs are not rendered by default.

2,548 changes: 0 additions & 2,548 deletions inputfiles/totals_by_system.csv

This file was deleted.

2,508 changes: 0 additions & 2,508 deletions inputfiles/totals_by_system2022-03-28.csv

This file was deleted.

2,548 changes: 0 additions & 2,548 deletions inputfiles/totals_by_system2022-03-29.csv

This file was deleted.

2,643 changes: 0 additions & 2,643 deletions inputfiles/totals_by_system2022-03-30.csv

This file was deleted.

2,736 changes: 0 additions & 2,736 deletions inputfiles/totals_by_system2022-03-31.csv

This file was deleted.

2,813 changes: 0 additions & 2,813 deletions inputfiles/totals_by_system2022-04-01.csv

This file was deleted.

2,953 changes: 0 additions & 2,953 deletions inputfiles/totals_by_system2022-04-02.csv

This file was deleted.

2,963 changes: 0 additions & 2,963 deletions inputfiles/totals_by_system2022-04-03.csv

This file was deleted.

2,963 changes: 0 additions & 2,963 deletions inputfiles/totals_by_system2022-04-04.csv

This file was deleted.

3,174 changes: 0 additions & 3,174 deletions inputfiles/totals_by_system2022-04-05.csv

This file was deleted.

30,561 changes: 0 additions & 30,561 deletions inputfiles/totals_by_system2022-04-07.csv

This file was deleted.

3,963 changes: 3,963 additions & 0 deletions inputfiles/totals_by_system2022-04-20.csv

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion scrape_oryx.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,16 @@ source("R/totals_by_type.R")
source("R/per_event.R")
source("R/daily_count.R")

russia_url <- "https://www.oryxspioenkop.com/2022/02/attack-on-europe-documenting-equipment.html"
ukraine_url <- "https://www.oryxspioenkop.com/2022/02/attack-on-europe-documenting-ukrainian.html"

tmp <-
file.path("outputfiles", sprintf("scrape_oryx_%s.log", format(Sys.time(), "%Y%m%dT%H%M%S")))
lf <- logr::log_open(tmp)
today <- format(Sys.Date(), "%Y-%m-%d")


totals_by_system <- scrape_data() %>%
totals_by_system <- create_data() %>%
readr::write_csv(., file = glue::glue("outputfiles/totals_by_system.csv"))

#' Write Event Tables
Expand Down

0 comments on commit 651ec48

Please sign in to comment.