This repository has been archived by the owner on Aug 25, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
/
bedrock.hcl.tmpl
246 lines (234 loc) · 6.75 KB
/
bedrock.hcl.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
// Refer to https://docs.basis-ai.com/getting-started/writing-files/bedrock.hcl for more details.
version = "1.0"
/*
Train stanza
Comprises the following:
- [required] step: training steps to be run. Multiple steps are allowed but must have different names
- [optional] parameters: environment variables used by the script. They can be overwritten when you create a run.
- [optional] secrets: the names of the secrets necessary to run the script successfully
Step stanza
Comprises the following:
- [required] image: the base Docker image that the script will run in
- [optional] install: the command to install any other packages not covered in the image
- [required] script: the command that calls the script
- [optional] resources: the computing resources to be allocated to this run step
- [optional] retry: retry strategy to be used for this run step on failure
- [optional] depends_on: a list of names of steps that this run step depends on
*/
train {
// We declare a step with a step name. For example, this step is named as "preprocess".
// A step's name must be unique.
step "preprocess" {
// Same as spark.kubernetes.container.image
image = "quay.io/basisai/workload-standard:v0.3.4"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements.txt",
]
// As we are using Spark, "script" is written in the manner shown below.
// If Spark is not required, it is just simply:
// script = [{sh = ["python3 train.py"]}]
script = [
{
spark-submit {
script = "preprocess.py"
// to be passed in as --conf key=value
conf {
"spark.executor.instances" = "2"
"spark.executor.memory" = "4g"
"spark.executor.cores" = "2"
"spark.sql.parquet.compression.codec" = "gzip"
}
// to be passed in as --key=value
settings {
}
}
}
]
resources {
// Same as spark.driver.cores
cpu = "0.5"
// Same as spark.driver.memory
memory = "1G"
}
retry {
// Limit is the maximum number of attempts when retrying a step (excludes initial attempt)
limit = "1"
}
}
step "generate_features" {
image = "quay.io/basisai/workload-standard:v0.3.4"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements.txt",
]
script = [
{
spark-submit {
script = "generate_features.py"
conf {
"spark.executor.instances" = "2"
"spark.executor.memory" = "4g"
"spark.executor.cores" = "2"
"spark.sql.parquet.compression.codec" = "gzip"
}
}
}
]
resources {
cpu = "0.5"
memory = "1G"
}
retry {
limit = "0"
}
depends_on = ["preprocess"]
}
step "train" {
image = "python:3.7"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements.txt",
]
script = [{ sh = ["python3 train.py"] }]
resources {
cpu = "0.5"
memory = "1G"
// gpu = "1" // uncomment this in order to use GPU. Only integer values are allowed.
}
retry {
limit = "3"
}
depends_on = ["generate_features"]
}
parameters {
RAW_SUBSCRIBERS_DATA = "s3a://bedrock-sample/churn_data/subscribers.gz.parquet"
RAW_CALLS_DATA = "s3a://bedrock-sample/churn_data/all_calls.gz.parquet"
TEMP_DATA_BUCKET = "s3a://bdrk-sandbox-aws-data/"
PREPROCESSED_DATA = "churn_data/preprocessed"
FEATURES_DATA = "churn_data/features.csv"
LR = "0.05"
NUM_LEAVES = "10"
N_ESTIMATORS = "100"
OUTPUT_MODEL_NAME = "lgb_model.pkl"
}
// only provide the NAMES of the secrets here, NOT the secret values.
// you will enter the secret values from Bedrock web UI.
/*
secrets = [
"SECRET_KEY_1",
"SECRET_KEY_2"
]
*/
}
/*
Batch score stanza
Similar in style as Train stanza
*/
batch_score {
step "preprocess" {
image = "quay.io/basisai/workload-standard:v0.3.4"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements.txt",
]
script = [
{
spark-submit {
script = "preprocess.py"
conf {
"spark.executor.instances" = "2"
"spark.executor.memory" = "4g"
"spark.executor.cores" = "2"
"spark.sql.parquet.compression.codec" = "gzip"
}
}
}
]
resources {
cpu = "0.5"
memory = "1G"
}
retry {
limit = "3"
}
}
step "generate_features" {
image = "quay.io/basisai/workload-standard:v0.3.4"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements.txt",
]
script = [
{
spark-submit {
script = "generate_features.py"
conf {
"spark.executor.instances" = "2"
"spark.executor.memory" = "4g"
"spark.executor.cores" = "2"
"spark.sql.parquet.compression.codec" = "gzip"
}
}
}
]
resources {
cpu = "0.5"
memory = "1G"
}
retry {
limit = "1"
}
depends_on = ["preprocess"]
}
step batch_score {
image = "python:3.7"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements.txt",
]
script = [{ sh = ["python3 batch_score.py"] }]
resources {
cpu = "0.5"
memory = "1G"
}
retry {
limit = "3"
}
depends_on = ["generate_features"]
}
parameters {
RAW_SUBSCRIBERS_DATA = "s3a://bedrock-sample/churn_data/subscribers.gz.parquet"
RAW_CALLS_DATA = "s3a://bedrock-sample/churn_data/all_calls.gz.parquet"
TEMP_DATA_BUCKET = "s3a://bdrk-sandbox-aws-data/"
PREPROCESSED_DATA = "churn_data/preprocessed"
FEATURES_DATA = "churn_data/features.csv"
SUBSCRIBER_SCORE_DATA = "churn_data/subscriber_score.csv"
OUTPUT_MODEL_NAME = "lgb_model.pkl"
}
}
/*
Serve stanza for HTTP serving
Only comprises the following:
- [required] image: the base Docker image that the script will run in
- [optional] install: the command to install any other packages not covered in the image
- [required] script: the command that calls the script
*/
serve {
image = "python:3.7"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements-serve.txt",
]
script = [
{
sh = [
"gunicorn --bind=:${BEDROCK_SERVER_PORT:-8080} --worker-class=gthread --workers=${WORKERS} --timeout=300 --preload serve_http:app"
]
}
]
parameters {
WORKERS = "2"
PROMETHEUS_MULTIPROC_DIR = "/tmp"
}
}