-
Notifications
You must be signed in to change notification settings - Fork 12
/
run_gypsum_dl.py
executable file
·307 lines (266 loc) · 9.54 KB
/
run_gypsum_dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#!/usr/bin/env python
# Copyright 2023 Jacob D. Durrant
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Gypsum-DL 1.2.1 is a conversion script to transform smiles strings and 2D SDFs
into 3D models.
"""
def print_gypsum_citation():
"""
Print out the citation for the Gypsum-DL paper.
Because this is before the Parallelizer is initiallized it requires
limiting the print statement to the cpu ranked=0.
Without this check, in MPI mode it would print once per available cpu.
"""
import sys
# And always report citation information.
citation_print = (
"\nIf you use Gypsum-DL in your research, please cite:\n\n"
+ "Ropp, Patrick J., Jacob O. Spiegel, Jennifer L. Walker, Harrison Green,\n"
)
citation_print += "Guillermo A. Morales, Katherine A. Milliken, John J. Ringe, and Jacob D. Durrant.\n"
citation_print += "(2019) Gypsum-DL: An Open-source Program for Preparing Small-molecule Libraries for \n"
citation_print += (
"Structure-based Virtual Screening. Journal of Cheminformatics 11:1. "
)
citation_print += "\ndoi:10.1186/s13321-019-0358-3.\n"
try:
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.rank
if rank == 0:
print(citation_print)
except Exception:
print(citation_print)
# print out the citation of Gypsum-DL paper.
print_gypsum_citation()
import argparse
import copy
from gypsum_dl.Start import prepare_molecules
from gypsum_dl.Test.Tester import run_test
from gypsum_dl import Utils
PARSER = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Gypsum-DL 1.2.1, a free, open-source program for preparing 3D small-molecule
models. Beyond simply assigning atomic coordinates, Gypsum-DL accounts for
alternate ionization, tautomeric, chiral, cis/trans isomeric, and
ring-conformational forms.""",
epilog="""
EXAMPLES OF USE:
1. Prepare a virtual library and save all 3D models to a single SDF file in the
present directory:
python run_gypsum_dl.py --source ./examples/sample_molecules.smi
2. Instead save all 3D models to a different, existing folder:
python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
--output_folder /my/folder/
3. Additionally save the models associated with each input molecule to
separate files:
python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
--output_folder /my/folder/ --separate_output_files
4. In addition to saving a 3D SDF file, also save 3D PDB files and an HTML file
with 2D structures (for debugging).
python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
--output_folder /my/folder/ --add_pdb_output --add_html_output
5. Save at most two variants per input molecule:
python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
--output_folder /my/folder/ --max_variants_per_compound 2
6. Control how Gypsum-DL ionizes the input molecules:
python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
--output_folder /my/folder/ --min_ph 12 --max_ph 14 --pka_precision 1
7. Run Gypsum-DL in serial mode (using only one processor):
python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
--job_manager serial
8. Run Gypsum-DL in multiprocessing mode, using 4 processors:
python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
--job_manager multiprocessing --num_processors 4
9. Run Gypsum-DL in mpi mode using all available processors:
mpirun -n $NTASKS python -m mpi4py run_gypsum_dl.py \\
--source ./examples/sample_molecules.smi \\
--job_manager mpi --num_processors -1
10. Gypsum-DL can also take parameters from a JSON file:
python run_gypsum_dl.py --json myparams.json
Where myparams.json might look like:
{
"source": "./examples/sample_molecules.smi",
"separate_output_files": true,
"job_manager": "multiprocessing",
"output_folder": "/my/folder/",
"add_pdb_output": true,
"add_html_output": true,
"num_processors": -1
}
""",
)
PARSER.add_argument(
"--json",
"-j",
type=str,
metavar="param.json",
help="Name of a json file containing all parameters. \
Overrides all other arguments specified at the commandline.",
)
PARSER.add_argument(
"--source",
"-s",
type=str,
metavar="input.smi",
help="Name of the source file (e.g., input.smi). Note: support for SMI (SMILES) files is better than support for SDF files, though Gypsum-DL can handle both.",
)
PARSER.add_argument(
"--output_folder",
"-o",
type=str,
help="The path to an existing folder where the Gypsum-DL "
+ "output file(s) will be saved.",
)
PARSER.add_argument(
"--job_manager",
type=str,
default="multiprocessing",
choices=["mpi", "multiprocessing", "serial"],
help="Determine what style of multiprocessing to use: mpi, \
multiprocessing, or serial. Serial will override the \
num_processors flag, forcing it to be one. MPI mode \
requires mpi4py 2.1.0 or higher and should be executed \
as: mpirun -n $NTASKS python -m mpi4py run_gypsum_dl.py \
...-settings...",
)
PARSER.add_argument(
"--num_processors",
"-p",
type=int,
metavar="N",
default=1,
help="Number of processors to use for parallel \
calculations.",
)
PARSER.add_argument(
"--max_variants_per_compound",
"-m",
type=int,
metavar="V",
help="The maximum number of variants to create per input \
molecule.",
)
PARSER.add_argument(
"--thoroughness",
"-t",
type=int,
help="How widely to search for low-energy conformers. \
Larger values increase run times but can produce better \
results.",
)
PARSER.add_argument(
"--separate_output_files",
action="store_true",
help="Indicates that the outputs should be split between \
files. If true, each output .sdf file will correspond to a \
single input file, but different 3D conformers will still \
be stored in the same file.",
)
PARSER.add_argument(
"--add_pdb_output",
action="store_true",
help="Indicates that the outputs should also be written in \
the .pdb format. Creates one PDB file for each molecular \
variant.",
)
PARSER.add_argument(
"--add_html_output",
action="store_true",
help="Indicates that the outputs should also be written in \
the .html format, for debugging. Attempts to open a \
browser for viewing.",
)
PARSER.add_argument(
"--min_ph", metavar="MIN", type=float, help="Minimum pH to consider."
)
PARSER.add_argument(
"--max_ph", metavar="MAX", type=float, help="Maximum pH to consider."
)
PARSER.add_argument(
"--pka_precision",
metavar="D",
type=float,
help="Size of pH substructure ranges. See Dimorphite-DL \
publication for details.",
)
PARSER.add_argument(
"--skip_optimize_geometry", action="store_true", help="Skips the optimization step."
)
PARSER.add_argument(
"--skip_alternate_ring_conformations",
action="store_true",
help="Skips the non-aromatic ring-conformation \
generation step.",
)
PARSER.add_argument(
"--skip_adding_hydrogen", action="store_true", help="Skips the ionization step."
)
PARSER.add_argument(
"--skip_making_tautomers",
action="store_true",
help="Skips tautomer-generation step.",
)
PARSER.add_argument(
"--skip_enumerate_chiral_mol",
action="store_true",
help="Skips the ennumeration of unspecified chiral \
centers.",
)
PARSER.add_argument(
"--skip_enumerate_double_bonds",
action="store_true",
help="Skips the ennumeration of double bonds.",
)
PARSER.add_argument(
"--let_tautomers_change_chirality",
action="store_true",
help="Allow tautomers that change \
the total number of chiral centers (see README.md for \
further explanation).",
)
PARSER.add_argument(
"--use_durrant_lab_filters",
action="store_true",
help="Use substructure filters to \
remove molecular variants that, though technically \
possible, were judged improbable by members of the \
Durrant lab. See README.md for more details.",
)
PARSER.add_argument(
"--2d_output_only", action="store_true", help="Skips the generate-3D-models step."
)
PARSER.add_argument(
"--cache_prerun",
"-c",
action="store_true",
help="Run this before running Gypsum-DL in mpi mode.",
)
PARSER.add_argument(
"--test", action="store_true", help="Tests Gypsum-DL to check for programming bugs."
)
ARGS_DICT = vars(PARSER.parse_args())
if ARGS_DICT["test"] == True:
run_test()
elif ARGS_DICT["cache_prerun"] == False:
INPUTS = copy.deepcopy(ARGS_DICT)
for k, v in ARGS_DICT.items():
if v is None:
del INPUTS[k]
prepare_molecules(INPUTS)
Utils.log("Finished Gypsum-DL")
else:
pass