-
Notifications
You must be signed in to change notification settings - Fork 3
/
coderdata.yaml
executable file
·283 lines (281 loc) · 7.65 KB
/
coderdata.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
id: http://w3id.org/linkml/examples/coderdata
name: coderdata
prefixes:
linkml: https://w3id.org/linkml
coderdata: https://w3id.org/linkml/examples/coderdata
schema: http://schema.org/
imports:
- linkml:types
default_range: string
default_prefix: coderdata
slots:
entrez_id:
description: Gene id according to NCBI
range: integer
improve_sample_id:
description: Unique sample identifier for this dataset, generated by this package
identifier: true
slot_uri: schema:identifier
range: integer
improve_drug_id:
description: Unique identifier for this dataset, one for each smile string.
identifier: true
slot_uri: schema:identifier
time:
range: float
description: Time at which measurement was administered, unit below.
time_unit:
description: Unit of time in which drug treatment was measured, e.g. minutes, hours, weeks.
source:
description: Source of data generated.
study:
description: Study informatiom identifying where/when data was collected.
other_id:
description: other identifier
recommended: true
range: linkml:Any
other_id_source:
description: source of other identifier
classes:
Gene:
description: All genes used in coderdata
slots:
- entrez_id
- other_id
- other_id_source
attributes:
gene_symbol:
required: true
description: HGNC Gene symbol or synomym
Sample:
description: Unique samples identified for each experiment, used to cross reference omics data to drug data.
slots:
- improve_sample_id
- other_id
- other_id_source
attributes:
common_name:
description: name for drug
range: linkml:Any
cancer_type:
description: Type of cancer
other_names:
description: Other name for sample
range: linkml:Any
species:
description: Species
model_type:
description: Type of model, must be confined to our existing set of definitions
range: Models
Drug:
description: List of chemicals/drugs used in the data package. Each identifer corresponds to a distinct structure.
slots:
- improve_drug_id
attributes:
chem_name:
description: Name of drug
range: linkml:Any
canSMILES:
description: Canonical SMILE string
isoSMILES:
description: Isomeric SMILE string
InChIKey:
description: InChIKey
formula:
description: Chemical formula
weight:
description: Molecular weight
range: linkml:Any
pubchem_id:
description: PubChem Identifier for this drug, can be many.
range: int
Drug Descriptor:
description: Computational summary of drug chemical properties
slots:
- improve_drug_id
attributes:
structural_descriptor:
description: string name describing structural descriptor
descriptor_value:
range: linkml:Any
description: value representing descriptor value
Transcriptomics:
description:
slots:
- entrez_id
- improve_sample_id
- source
- study
attributes:
transcriptomics:
range: float
description: Transcripts per million of sample at baseline.
Proteomics:
slots:
- entrez_id
- improve_sample_id
- source
- study
attributes:
proteomics:
range: float
description: Log-normalized log ratio of proteomics measurements of sample at baseline.
Copy Number:
slots:
- entrez_id
- improve_sample_id
- source
- study
attributes:
copy_number:
range: float
description: Copy number measurements of sample at baseline
copy_call:
range: CopyCall
description: Categorical variable defining copy number measurement.
Mutations:
slots:
- entrez_id
- improve_sample_id
- source
- study
attributes:
mutation:
description: Description of the mutation, when available.
variant_classification:
description: Classification of the variant
range: Variant
Experiments:
slots:
- improve_drug_id
- improve_sample_id
- source
- study
- time
- time_unit
attributes:
dose_response_metric:
range: CurveMetric
description: Metric by which dose response value is measured
dose_response_value:
description: Value of metric
range: linkml:Any
Perturbations:
slots:
- entrez_id
- improve_sample_id
- time
- time_unit
- source
- study
attributes:
data_value:
range: float
description: Response value of the gene
data_type:
description: The value represented in the data value
perturbation:
description: Drug or gene id that was perturbed
range: integer
perturbation_type:
description: The perturbagen that was used
range: CellPerturbation
Combinations:
slots:
- improve_sample_id
- source
- study
attributes:
drug_drug_2:
description: improve_drug_id of first drug
improve_drug_2:
description: imrrove_drug_id of second drug
drug_combination_metric:
description: metric calculated for synergy, or other metric of two drugs
drug_combination_value:
description: value of metric for synergy or combination
enums:
ResponseMetric:
permissible_values:
fit_auc:
description: Area under the fitted curve
fit_ic50:
description: Concetration at which inhibitor binding is reduced by half
fit_ec50:
description: Concentration required to have half cells with adverse effect
fit_ec50se:
description: Standard error in EC50 calculation
fit_einf:
description: Fraction of cells not susceptible to the drug
fit_hs:
description: Hill slop binding cooperativity
fit_r2:
description: R2 value between fit and actual data
aac:
description: Area above curve measured by integrating the data points
auc:
description: Area under the curve measured by integrating data points
dss:
description: I believe this is the drug sensitivity score
mrecist:
description: For PDX data this value should be either Progressive Disease, Stable Disease, Partial Response or Complete Response.
gc_auc:
description: Area under tumor growth curve
efs:
description: event free survival
os:
description: overall survival
tgi:
description: tumor growth inhibitiojn
kulgap:
description: kl divergence based metric
CellPerturbation:
permissible_values:
gene_ko:
description: Gene loss via CRISPR or other screen
drug:
description: Drug treatment
Models:
permissible_values:
cell line:
organoid:
tumor:
patient derived xenograft:
ex vivo:
Variant:
permissible_values:
3'UTR:
3'Flank:
5'Flank:
5'UTR:
Undetermined:
De_novo_Start_InFrame:
De_novo_Start_OutOfFrame:
Frame_Shift_Del:
Frame_Shift_Ins:
Frameshift_Variant:
Protein_Altering_Variant:
IGR:
In_Frame_Del:
In_Frame_Ins:
Intron:
Missense_Mutation:
Nonsense_Mutation:
Nonstop_Mutation:
RNA:
Start_Codon_SNP:
Start_Codon_Del:
Start_Codon_Ins:
Stop_Codon_Del:
Stop_Codon_Ins:
Silent:
Splice_Site:
Splice_Region:
Translation_Start_Site:
CopyCall:
permissible_values:
amp:
gain:
diploid:
het loss:
deep del: