-
Notifications
You must be signed in to change notification settings - Fork 0
/
6_run_interpolated.R
72 lines (66 loc) · 2.48 KB
/
6_run_interpolated.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
library(bigrquery)
library(dplyr)
bigrquery::bq_auth()
2
project_id <- "world-fishing-827"
dataset <- 'scratch_andrea_ttl100'
#We have the new track file
tracks
# david was running interpolated for each date
# query for one day
#notice the {.x} to loop later
interpolated_albatross <- readr::read_file("queries/05_david_interpolated_positions_Nov.sql")
interpolated_AIS <- readr::read_file("queries/06_david_AIS_interpolated_Nov.sql")
interpolated <- interpolated_albatross
# a function to loop through dates
create_table <- function(.x) {
# create the query
query <- glue::glue(interpolated)
# name the table to create a partitioned table by date
table_name <- paste0(project_id, ".", dataset, ".orben_interpolate$",format.Date(.x, "%Y%m%d"))
# do the query
bq_project_query(project_id,
query = query,
destination_table = table_name,
configuration = list(
query = list(
time_partitioning =
list(#field = "time",#error with field but partition ok
type = "DAY"))
)
)
}
#orbencreate_table("2024-06-09")#test for one day creating a table interpolate_test #I ran this afterwards because we were missing that day
#two datasets in BQ, interpolate (larger) and albatross_intepolate, (smaller).
# will share with Rachael from the first and redo the loop in January
# execute the query in parallel across the list of dates
library(furrr)
library(dplyr)
parallel::detectCores()
plan(multisession, workers = parallel::detectCores() - 2)
DATES <- lubridate::date(tracks$datetime) %>% unique() %>% sort()
furrr::future_map(DATES, ~create_table(.x))
plan(sequential)
### AIS
interpolated <- interpolated_AIS
# a function to loop through dates
create_table <- function(.x) {
# create the query
query <- glue::glue(interpolated)
# name the table to create a partitioned table by date
table_name <- paste0(project_id, ".", dataset, ".orben_AIS_interpolate$",format.Date(.x, "%Y%m%d"))
# do the query
bq_project_query(project_id,
query = query,
destination_table = table_name,
configuration = list(
query = list(
time_partitioning =
list(type = "DAY"))
)
)
}
parallel::detectCores()
plan(multisession, workers = parallel::detectCores() - 2)
furrr::future_map(DATES, ~create_table(.x))
plan(sequential)