This repository has been archived by the owner on Jun 27, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
_targets.R
249 lines (227 loc) · 9.53 KB
/
_targets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# Created by use_targets().
# Follow the comments below to fill in this target script.
# Then follow the manual to check and run the pipeline:
# https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline
# Load packages required to define the pipeline:
library(targets)
library(tarchetypes)
library(geotargets)
library(tidyr)
library(fs)
library(quarto)
#Debug s3 bucket issues
# options(paws.log_level = 4L, paws.log_file = "paws_log.txt")
# Set target options:
tar_option_set(
# packages that your targets need to run
packages = c("ncdf4", "terra", "fs", "purrr", "units", "tidyterra", "ggplot2", "sf", "maps", "tidyr", "dplyr", "stringr", "stars", "magick", "ggridges", "ggrastr", "svglite", "ggtext", "ggthemes", "KernSmooth", "patchwork", "tibble", "amerifluxr"),
#
# For distributed computing in tar_make(), supply a {crew} controller
# as discussed at https://books.ropensci.org/targets/crew.html.
# Choose a controller that suits your needs. For example, the following
# sets a controller that scales up to a maximum of two workers
# which run as local R processes. Each worker launches when there is work
# to do and exits if 60 seconds pass with no tasks to run.
#
# controller = crew::crew_controller_local(workers = 3, seconds_idle = 60),
# Use s3 bucket for targets store
# repository = "aws", #comment out or change to "local" to store targets locally on disk
## Jetstream2 bucket:
# resources = tar_resources(
# aws = tar_resources_aws(
# bucket = "test123456", #TODO: Maybe create a new bucket with Julian's help eventually
# prefix = "carbon_stores",
# endpoint = "https://js2.jetstream-cloud.org:8001",
# max_tries = 10
# )
# )
## Wasabi bucket:
# resources = tar_resources(
# aws = tar_resources_aws(
# bucket = "targets-test",
# prefix = "carbon_stores",
# endpoint = "https://s3.us-central-1.wasabisys.com"
# )
# )
)
# Run the R scripts in the R/ folder with your custom functions:
tar_source()
# source("other_functions.R") # Source other scripts as needed.
tar_plan(
# Read shapefiles ---------------------------------------
conus = maps::map("usa", plot = FALSE, fill = TRUE) |> st_as_sf(),
ca_az = make_ca_az_sf(),
# Read and harmonize 2010 AGB data products ------------
# Intially cropped to CONUS when read in, and transformed to common CRS (but
# not common resolution).
# By default, tar_file() with repository="aws" uploads the files to the s3
# bucket. Since they're already on an attached volume on Jetstream2, I use
# repository = "local" to prevent this. Logic above changes the path to the
# correct place depending on where this is run.
tar_file(esa_files, dir_ls("data/rasters/ESA_CCI/", glob = "*2010-fv4.0.tif*"), repository = "local"),
tar_terra_rast(esa_agb, read_clean_esa(esa_files, conus)),
tar_file(chopping_file, "data/rasters/Chopping/MISR_agb_estimates_20002021.tif", repository = "local"),
tar_terra_rast(chopping_agb, read_clean_chopping(chopping_file, esa_agb, conus)),
tar_file(liu_file, "data/rasters/Liu/Aboveground_Carbon_1993_2012.nc", repository = "local"),
tar_terra_rast(liu_agb, read_clean_liu(liu_file, esa_agb, conus)),
tar_file(xu_file, "data/rasters/Xu/test10a_cd_ab_pred_corr_2000_2019_v2.tif", repository = "local"),
tar_terra_rast(xu_agb, read_clean_xu(xu_file, esa_agb, conus)),
# tar_file(rap_file, "data/rasters/RAP/vegetation-biomass-v3-2010.tif"),
# tar_terra_rast(rap_agb, read_clean_rap(rap_file, esa_agb, conus)),
tar_file(ltgnn_files, fs::dir_ls("data/rasters/LT_GNN", glob = "*.zip"), repository = "local"),
tar_terra_rast(ltgnn_agb, read_clean_lt_gnn(ltgnn_files, esa_agb, conus)),
tar_file(menlove_dir, "data/rasters/Menlove/data/", repository = "local"),
tar_terra_rast(menlove_agb, read_clean_menlove(menlove_dir, esa_agb, conus)),
tar_file(gedi_file, "data/rasters/GEDI_L4B_v2.1/data/GEDI04_B_MW019MW223_02_002_02_R01000M_MU.tif",
repository = "local"),
tar_terra_rast(gedi_agb, read_clean_gedi(gedi_file, esa_agb, conus)),
# Extract data for every NEON & Ameriflux site ----------------------------
tar_file(neon_kmz, "data/shapefiles/NEON_Field_Sites_KMZ_v18_Mar2023.kmz"),
tar_file(neon_field_path, "data/shapefiles/Field_Sampling_Boundaries_2020/"),
# TODO switch to tar_sf() once it exists?
tar_target(
site_locs,
get_site_locs(neon_kmz, neon_field_path)
),
tar_map(
values = tibble(
product = rlang::syms(
c("esa_agb", "chopping_agb", "gedi_agb", "liu_agb", "ltgnn_agb", "menlove_agb", "xu_agb")
)
),
tar_target(
sites,
extract_agb_site(product, site_locs)
)
),
#collect, pivot wider, join to site_locs
tar_target(
sites_wide_csv,
pivot_sites(sites_esa_agb, sites_chopping_agb, sites_gedi_agb, sites_liu_agb,
sites_ltgnn_agb, sites_menlove_agb, sites_xu_agb, site_locs = site_locs),
format = "file"
),
# Stack em! ---------------------------------------------------------------
# Project to common resolution, crop to CA and AZ, and create a multi-layer raster stack
#ignoring RAP for the moment
tar_terra_rast(
agb_stack,
make_agb_stack(chopping_agb, liu_agb, xu_agb, ltgnn_agb, menlove_agb, gedi_agb,
esa = esa_agb, region = ca_az)
),
# Plots -------------------------------------------------------------------
tar_file(srer_dir, "data/shapefiles/srerboundary/", repository = "local"),
tar_file(pima_dir, "data/shapefiles/Pima_County_Boundary/", repository = "local"),
# Dynamic branching example. Might be more useful when there are a lot more subsets
# tar_target(subsets,
# make_shape_list(crs = st_crs(agb_stack), srer_dir = srer_dir, pima_dir = pima_dir),
# iteration = "list"),
# tar_target(test, ext(crop(agb_stack, subsets))[1], pattern = map(subsets)),
# Use static branching to make all the maps of all the subsets
az = maps::map("state", "arizona", plot = FALSE, fill = TRUE) |>
st_as_sf() |>
st_transform(st_crs(agb_stack)) |>
mutate(subset = "AZ"),
ca = maps::map("state", "california", plot = FALSE, fill = TRUE) |>
st_as_sf() |>
st_transform(st_crs(agb_stack)) |>
st_make_valid() |>
mutate(subset = "CA"),
srer = st_read(srer_dir) |>
st_transform(st_crs(agb_stack)) |>
mutate(subset = "SRER"),
pima = st_read(pima_dir) |>
st_transform(st_crs(agb_stack)) |>
mutate(subset = "Pima County"),
tar_map(
values = tidyr::expand_grid(
subset = rlang::syms(c("az", "ca", "srer", "pima")),
file_ext = c("png", "pdf")
),
# Maps faceted by data product
tar_target(
agb_map,
plot_agb_map(agb_stack, subset, downsample = TRUE, ext = file_ext),
format = "file"
),
# Maps of median AGB across products
tar_target(
median_map,
plot_median_map(agb_stack, subset, downsample = FALSE, ext = file_ext, height = 2),
format = "file"
),
# # Maps of SD across products
tar_target(
sd_map,
plot_sd_map(agb_stack, subset, downsample = FALSE, ext = file_ext, height = 2),
format = "file"
)
),
# Density ridge plots
#TODO this would be faster if the plots were made once and saved twice. Don't have the same limitations as geom_spatraster where you can't save the resulting ggplot objects as targets.
#TODO make these plots using data in original resolution?
tar_map(
values = list(ext = "png"), #for prototyping
# values = list(ext = c("png", "pdf")), #uncomment to produce publication quality figures
tar_target(
ridge_az,
plot_agb_ridges(agb_stack, az,
filename = paste("agb_density_az", ext, sep = "."),
height = 2, width = 4.2),
format = "file"
),
tar_target(
ridge_ca,
plot_agb_ridges(agb_stack, ca,
break_x = 50,
filename = paste("agb_density_ca", ext, sep = "."),
height = 2, width = 4.2),
format = "file"
),
tar_target(
ridge_pima,
plot_agb_ridges(agb_stack, pima,
break_x = c(30, 50),
filename = paste("agb_density_pima", ext, sep = "."),
height = 2, width = 4.2),
format = "file"
),
tar_target(
ridge_srer,
plot_agb_ridges(agb_stack, srer,
break_plot = FALSE,
filename = paste("agb_density_srer", ext, sep = "."),
height = 2, width = 4.2),
format = "file"
)
),
# Summary statistics
tar_target(subsets,
list("AZ" = az, "CA" = ca, "SRER" = srer, "Pima County" = pima),
iteration = "list"),
tar_target(
summary_stats,
calc_summary(agb_stack, subsets),
pattern = map(subsets)
),
# Scatter plots against ESA, just for Arizona for now
tar_target(agb_df_az, as_tibble(as.data.frame(crop(agb_stack, az, mask = TRUE, overwrite = TRUE)))),
tar_target(plot_comparisons, colnames(agb_df_az)[colnames(agb_df_az)!="ESA CCI"]),
tar_target(
scatter_plots,
plot_scatter(
agb_df_az,
comparison = plot_comparisons,
height = 2,
width = 2
),
pattern = map(plot_comparisons),
format = "file"
),
tar_target(zip_scatter_plots, zip_plots(scatter_plots, "docs/fig/scatter.zip"), format = "file"),
# # Render docs -------------------------------------------------------------
#report
tar_quarto(report, "docs/report.qmd", working_directory = "docs"),
#README
tar_quarto(readme, "README.qmd", cue = tar_cue(mode = "always"))
)