-
Notifications
You must be signed in to change notification settings - Fork 0
/
Pulumi.cape-cod-dev.yaml
283 lines (282 loc) · 14.3 KB
/
Pulumi.cape-cod-dev.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
encryptionsalt: v1:PZ8pzBjNSVk=:v1:SGEZ1sCsg7aCAu5i:Ntz5ud8IjMsY1H5RLI/n6zmuuIK9sQ==
config:
# This block is for meta about the deployment itself that may be reused in
# later config settings.
deployment:meta: &depmet
# stage-suffix is used anywhere we need the deployment stage name and
# should be values like "dev", "prod", etc. There is exact no exact
# enumeration of values yet.
stage-suffix: "dev"
aws:region: us-east-2
cape-cod:meta:
glue:
etl:
- name: etl-gphl-cre
key: glue/etl/etl_gphl_cre_alert.py
srcpth: ./assets/etl/etl_gphl_cre_alert.py
- name: etl-tnl
key: glue/etl/etl_tnl_alert.py
srcpth: ./assets/etl/etl_tnl_alert.py
- name: etl-fastx
key: glue/etl/etl_fasta_fastq.py
srcpth: ./assets/etl/etl_fasta_fastq.py
- name: etl-gphl-sequencing
key: glue/etl/etl_gphl_sequencing_alert.py
srcpth: ./assets/etl/etl_gphl_sequencing_alert.py
# TODO: ISSUE #144 this is for the initial bactopia results
# handling. it may not be best to have here long term, and
# we don't know yet how were managing these things. so for
# now it's here (we also need to think about how we handle
# pipelines that may have different etl needs for
# different versions)
- name: etl-bactopia-results
key: glue/etl/etl_bactopia_results.py
srcpth: ./assets/etl/etl_bactopia_results.py
cape-cod:swimlanes:
private:
# This is the private domain that will be setup in the cloud
# provider private VPC.
# At this time, this does not need to be setup with a domain
# registrar unless it is also the domain used in a public facing
# resource. The domain will need to be able to be used for creation
# of TLS cert/key pairs though (in the development case, these are
# self-signed and in all cases need to be managed outside this
# repo).
domain: cape-dev.org
# NOTE: at this time we support a single (wildcard) cert per
# swimlane for non-vpn tls/ssl (vpn has its own cert). this
# may change in the future
tls:
dir: "./assets-untracked/tls/private-swimlane"
ca-cert: "ca.crt"
# if not specified, the key and cert are expected to be
# named following the pattern {fqdn}.[key|crt] where
# `{fqdn}` is the value of the domain key provided above
server-key: "*.cape-dev.org.key"
server-cert: "*.cape-dev.org.crt"
cidr-block: 10.0.0.0/16
public-subnet:
# public subnet is special and there will be one (for now) per
# swimlane (vpc), the main purpose is currently for a NAT to
# live in to allow egress to the internet for the instances in
# private subnets
cidr-block: 10.0.1.0/24
private-subnets:
- name: compute
cidr-block: 10.0.2.0/24
# if True, the private subnet's outgoing traffic will be
# routed to the NAT gateway for egress to the internet
routes:
- "public"
- name: vpn
cidr-block: 10.0.3.0/24
az: "us-east-2b"
routes:
- "public"
# TODO: ISSUE #118
# We really don't want this kind of name coupling for
# redundancy of subnets (or anything else). we should
# change how these are specified so redundant subnets are
# defined under the same blocks. This is just being done
# for quick demo turn around
- name: vpn2
cidr-block: 10.0.4.0/24
az: "us-east-2c"
routes:
- "public"
# NOTE: the apis section here will likely need to be re-worked as we
# move the api to a real host name, add certs, and all the
# things. for now just trying to get the stage name
# configurable from the root deployment:meta section
api:
# all apis will go in the same subdomain, and will be
# differentiated based on the path component of each individual
# api in the list below. e.g. for a domain of cape-dev.org and a
# subdomain value of "api", all apis will be rooted at
# api.cape-dev.org. for an api named `api1` and a stage-name of
# `dev`, that specific api can be found at
# api.cape-dev.org/api1-dev
subdomain: "api"
# this item needs to exist as-is. we have a common stage suffix
# used for all apis in the dpeloyment at present, and this is
# how we access it
stage:
meta: *depmet
apis:
# Every element in this object will specify an individual
# api to be deployed. Where the key is the api name and
# each item should have:
# - desc: A description for the api. Will be used in
# description tags.
# - spec_file: An OpenApi 3.0.1 yaml specification. This
# should include *no* AWS id's, names,
# account info, etc
# - env_vars: a list of swimlane-exposed env vars that the
# API requires access to. the env vars will be
# passed into the environment of the lambda
# and basic permissions will be given to the
# lambda to access the resources referenced by
# the env var.
# - handlers: a list of objects containing
# - id:
# the id in the spec file that will be replaced
# with the arn of the created lambda function
# - name:
# a short name that will be used in resource
# creation (naming of the resources)
# - code:
# the path in the repo for the lambda that
# implements the endpoint
# - funct_args:
# arguments to pass *as is* to the lambda
# function constructor. the keys used need to map
# to actual argument names of the pulumi lambda
# function constructor. additionally, most args
# will be ignored (e.g. environment, role) as
# they are dynamically injected in code or not
# used currently. If not provided, the handler
# will default to "index.index_handler" and the
# runtime will default to "python3.10". The
# following arguments are supported:
# - architectures (list, defaults to ["x86_64"])
# - description
# - handler (defaults to "index.index_handler")
# - memory_size (MB, defaults to 128)
# - runtime (defaults to "python3.10")
# - timeout (seconds, defaults to 3)
dap:
desc: "Data Analysis Pipeline API"
spec_file: "assets/api/data-analysis-pipeline/dap-openapi-301.yaml.j2"
env_vars:
- "DAP_QUEUE_NAME"
- "DAP_REG_DDB_TABLE"
handlers:
- id: "list_daps_handler"
name: "lsdaps"
code: "assets/api/data-analysis-pipeline/handlers/list_daps.py"
- id: "list_dap_executors_handler"
name: "lsexec"
code: "assets/api/data-analysis-pipeline/handlers/list_dap_executors.py"
- id: "submit_dap_run_handler"
name: "sbmtdap"
code: "assets/api/data-analysis-pipeline/handlers/submit_dap_run.py"
# static apps are deployed to s3 as html/js/css bundles and are
# exposed through an application load balancer. these may hit API
# endpoints (assuming the required permissions/roles are available),
# but have no server side functions. they are served as-is
# TODO: ISSUE #128
# TODO: ISSUE #166
static-apps:
- name: "dap-ui"
fqdn: "analysis-pipelines.cape-dev.org"
# repo_dir is the root directory in the repo for this app's
# files.
# TODO: long-term we will not want to have these apps in the
# repo, but rather follow the pattern we use with ETL
# scripts using separate repos
dir: "./assets/web/static/dap-ui"
vpn:
# This cidr-block is where vpn client ips will be allocated
# from. This is different than the cidr block of the vpn subnet
# itself. This CIDR block cannot overlap with the VPC nor with
# the subnet being assoociated with the VPN endpoint.
# Additionally it must be at least a /22 and no more than /12.
# More here:
# https://docs.aws.amazon.com/vpn/latest/clientvpn-admin/scaling-considerations.html
# If not specified, this will default to "10.1.0.0/22"
cidr-block: "10.1.0.0/22"
# valid values are "tcp" and "udp". if not specified this will
# default to "udp"
transport-proto: "udp"
tls:
dir: ./assets-untracked/tls/vpn
ca-cert: ca.crt
server-key: server.key
server-cert: server.crt
compute:
environments:
- name: analysis
# an AMI ID to use for each EC2 instance
image: ami-0cfe23bad78a802ea
# a list of subnets that ec2 instances in the compute
# environments live on
subnets:
- compute
resources:
# a list of instance types to be able to request
instance_types:
[
"c4.large",
"c4.xlarge",
"c4.2xlarge",
"c4.4xlarge",
"c4.8xlarge",
]
# the maximum number of vCPUs to have in the environment
max_vcpus: 16
# (optional) the desired number of vCPUs to have in the environment
# desired_vcpus: 8
# (optional) the minimum number of vCPUs to have in the environment
# min_vcpus: 8
cape-cod:datalakehouse:
# NOTE: unless specified otherwise in here, all crawlers will run at
# 0200 daily
tributaries:
- name: hai
buckets:
raw:
name:
crawler:
clean:
name:
crawler:
excludes:
classifiers:
- cape-csv-standard-classifier
pipelines:
data:
etl:
- name: tnl
script: glue/etl/etl_tnl_alert.py
prefix: tnl
suffixes:
- xlsx
pymodules:
- openpyxl==3.1.2
- name: gphl-cre
script: glue/etl/etl_gphl_cre_alert.py
prefix: gphl-cre
suffixes:
- docx
pymodules:
- python-docx==1.1.2
- name: gphl-sequencing
script: glue/etl/etl_gphl_sequencing_alert.py
prefix: gphl-sequencing
suffixes:
- pdf
pymodules:
- tabula-py==2.9.3
- pypdf==4.3.1
- name: genomics
buckets:
raw:
name:
crawler:
clean:
name:
crawler:
classifiers:
pipelines:
data:
etl:
- name: fastx
script: glue/etl/etl_fasta_fastq.py
prefix: fastx
suffixes:
- gz
- fasta
- fastq
pymodules:
- pyfastx==2.1.0
max_concurrent_runs: 5 # optional, 5 is the default