-
Notifications
You must be signed in to change notification settings - Fork 0
/
_targets.R
111 lines (101 loc) · 3.43 KB
/
_targets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Created by use_targets().
# Follow the comments below to fill in this target script.
# Then follow the manual to check and run the pipeline:
# https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline
# Load packages required to define the pipeline:
library(targets)
library(tarchetypes)
library(crew.cluster)
# You'll need to replace this with your HPC group name. You can discover the name by running `va` when logged into the HPC.
hpc_group <- "kristinariemer"
# Detect whether you're on HPC & not with an Open On Demand session (which cannot submit SLURM jobs) and set appropriate controller
slurm_host <- Sys.getenv("SLURM_SUBMIT_HOST")
hpc <- grepl("hpc\\.arizona\\.edu", slurm_host) & !grepl("ood", slurm_host)
# Set up potential controllers
controller_hpc_small <- crew.cluster::crew_controller_slurm(
name = "hpc_small",
workers = 2,
seconds_idle = 300, # time until workers are shut down after idle
slurm_partition = "standard",
slurm_time_minutes = 1200, # wall time for each worker
slurm_log_output = "logs/crew_log_%A.out",
slurm_log_error = "logs/crew_log_%A.err",
slurm_memory_gigabytes_per_cpu = 5,
slurm_cpus_per_task = 2, #total 10gb RAM
script_lines = c(
paste0("#SBATCH --account ", hpc_group),
"module load R"
#add additional lines to the SLURM job script as necessary here
)
)
controller_hpc_large <- crew.cluster::crew_controller_slurm(
name = "hpc_large",
workers = 2,
seconds_idle = 500,
slurm_partition = "standard",
slurm_time_minutes = 2000,
slurm_log_output = "logs/crew_log_%A.out",
slurm_log_error = "logs/crew_log_%A.err",
slurm_memory_gigabytes_per_cpu = 5,
slurm_cpus_per_task = 4, #total 20gb RAM
script_lines = c(
paste0("#SBATCH --account ", hpc_group),
"module load R"
#add additional lines to the SLURM job script as necessary here
)
)
controller_local <- crew::crew_controller_local(
name = "local",
workers = 4,
seconds_idle = 60,
local_log_directory = "logs/"
)
# Set target options:
tar_option_set(
packages = c("tibble"), # Packages that your targets need for their tasks.
controller = crew::crew_controller_group(controller_local, controller_hpc_large, controller_hpc_small),
resources = tar_resources(
#if on HPC use "hpc_small" controller by default, otherwise use "local"
crew = tar_resources_crew(controller = ifelse(hpc, "hpc_small", "local"))
),
# It should be safe to assume that all workers have read/write access to the
# _targets/ directory. These setting should speed things up.
storage = "worker",
retrieval = "worker"
)
# Run the R scripts in the R/ folder with your custom functions:
tar_source()
# tar_source("other_functions.R") # Source other scripts as needed.
# Replace the target list below with your own:
tar_plan(
tar_target(
data_raw,
tibble(x = rnorm(100), y = rnorm(100), z = rnorm(100))
),
#this just simulates a long-running step
tar_target(
data,
do_stuff(data_raw),
),
tar_target(
model1,
lm(y ~ x, data = data)
),
#these three models should be run in parallel as three separate SLURM jobs
tar_target(
model2,
lm(y ~ x + z, data = data)
),
tar_target(
model3,
lm(y ~ x*z, data = data),
#for this target only, use a worker with more cores and RAM
resources = tar_resources(
crew = tar_resources_crew(controller = ifelse(hpc, "hpc_large", "local"))
)
),
tar_target(
model_compare,
AIC(model1, model2, model3)
)
)