-
Notifications
You must be signed in to change notification settings - Fork 0
/
3_extract_positions.R
82 lines (73 loc) · 3.46 KB
/
3_extract_positions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
library(vroom)
library(dplyr)
# all positions
vessel_pos <- vroom("orben_results.csv")
#encounter info
#encounters <- vroom("results/encounter_info.csv")
encounters <- encounters %>% mutate(bird_id = paste(species, BirdID, sep = "_"))
#original tracks
bird_pos <- vroom("data/Orben_tracks_April5_2024.csv", col_select = -1) %>%
mutate(date = lubridate::date(datetime)) %>%
mutate(bird_id = paste(species, BirdID, sep = "_"))
# join
bird_encounter <- encounters %>%
right_join(bird_pos, relationship = "many-to-many")
#I was going to split per encounter
f = bird_encounter$EncounterID
length(unique(f))
list_birdID <- split(bird_encounter, f)
bird_tracks_EncounterID <- split(bird_encounter, f)
# I decided to save in a single table
readr::write_csv(bird_encounter, "results/all_bird_positions.csv")
#list bird puede ser esa pero
# Same function than in script 2, without the plots and reprojections and without the append! Saving in a separate file per encounter
parse_results <- function(i) {
ssvid_oi <- encounters$ssvid[i]
date_oi <- encounters$date[i]
bird_oi <- encounters$BirdID[i]
## dates associated with that ssvid
vessel_positions <- vessel_pos %>%
filter(ssvid %in% ssvid_oi) %>%
filter(date >= date_oi - 1 & date <= date_oi + 1) %>%
mutate(EncounterID = i) %>%
relocate(EncounterID)
readr::write_csv(vessel_positions,
glue::glue("results/Encounter{i}_vessel_tracks_encounter.csv"))
}
### Run the function
options(future.globals.maxSize = 90 * 1024 ^ 3) # for 50 Gb RAM
library(furrr)
plan(multisession, workers = 20)
plan("default")
all_vessel_positions <- furrr::future_map(seq_along(1:nrow(encounters)),
~parse_results(.x))
#name the list to bind rows
names(all_vessel_positions) <- paste("Encounter",1:7399)
vessel_positions_one_csv <- bind_rows(all_vessel_positions)
readr::write_csv(vessel_positions_one_csv, "results/all_vessel_positions.csv")
# save directly in one csv
vessel_positions_one_csv %>% select(-nnet_score) %>% readr::write_csv("all_vessel_positions.csv")
#upload to bigquery never worked because I was trying to do it by encounter and then by day it didn´t work. next step: do it partitioning by month?
#project_id <- "world-fishing-827"
#dataset_id <- "scratch_andrea_ttl100"
# bigrquery::bq_table_upload(x = bigrquery::bq_table(project = project_id,
# dataset = dataset_id,
# table = 'orben_vessel_per_encounter'),
# values = vessel_positions_one_csv,
# configuration = list(
# query = list(
# integer_partitioning =
# list(field = "EncounterID",
# type = "INTEGER"))))
#should have been by day or month
project_id <- "world-fishing-827"
dataset_id <- "scratch_andrea_ttl100"
bigrquery::bq_table_upload(x = bigrquery::bq_table(project = project_id,
dataset = dataset_id,
table = 'orben_vessel_per_encounter'),
values = vessel_positions_one_csv,
configuration = list(
query = list(
time_partitioning =
list(field = "date",
type = "DAY"))))