-
Notifications
You must be signed in to change notification settings - Fork 0
/
BetaSheetGenerator.java
366 lines (339 loc) · 17.1 KB
/
BetaSheetGenerator.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.*;
import com.google.common.collect.*;
import java.io.*;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.geometry.euclidean.threed.*;
import org.jgrapht.*;
import org.jgrapht.graph.*;
import org.jgrapht.alg.*;
/**
* This class takes a peptide of the form [Gly]n - D-Pro - Gly - [Gly]n and creates beta sheet conformations.
*
* The procedure is:
*
* 0. Create a peptide of the desired length.
*
* 1. Create a SheetUnit. Every iteration, draw a random set of backbone angles for
* one of the residues. Use X-ray-like angles for the hairpin and sheet-like (-120,+120) angles for the arms.
*
* 2. Check for very close clashes. If they are not present, minimize on the OPLS forcefield.
*
* 3. Count up how many hydrogen bonds are present. If this is an n-mer, we expect floor(n/2) hydrogen bonds. If we
* don't see at least floor(n/2)-1 hydrogen bonds, throw out the structure.
*
* 4. Repeat steps 1-4 a certain number of times, discarding duplicates. Return the lowest energy results.
*
* These steps run in parallel. In practice, duplicate removal is accomplished by using a Map where the keys are
* BackboneFingerprints. These are basically lists of the backbone angles, where the angles are rounded to a certain
* granularity.
*/
public class BetaSheetGenerator
{
/** not instantiable */
private BetaSheetGenerator()
{
throw new IllegalArgumentException("cannot be instantiated!");
}
/**
* Creates beta sheets using all local cores.
* @param armLength how many residues should go in each arm (this does not include the hairpin)
* @param maxIterations how many Monte Carlo iterations to do per simulation
* @param desiredNumberOfResults if this number of unique results is achieved (in aggregate), all runs will abort
* @param deltaAlpha the amount to decrease the temperature by on every Monte Carlo iteration
* @return the lowest energy beta sheet conformations
*/
public static List<Peptide> generateSheets(int armLength, int maxIterations, int desiredNumberOfResults, double deltaAlpha)
{
// check the input
if ( armLength < 2 )
throw new IllegalArgumentException("must have at least two residues in each arm");
if ( maxIterations < 1 )
throw new IllegalArgumentException("you must do at least one iteration");
if ( desiredNumberOfResults < 1 )
throw new IllegalArgumentException("you must want more than one result");
// create the seed peptide
List<String> arm = new ArrayList<>(armLength);
for (int i=0; i < armLength; i++)
arm.add("standard_glycine");
List<String> peptideStringList = new ArrayList<>(armLength*2 + 2);
for (String s : arm)
peptideStringList.add(s);
peptideStringList.add("d_proline");
peptideStringList.add("gly");
for (String s : arm)
peptideStringList.add(s);
List<ProtoAminoAcid> sequence = ProtoAminoAcidDatabase.getSpecificSequence(peptideStringList);
Peptide seedPeptide = PeptideFactory.createPeptide(sequence);
seedPeptide = PeptideFactory.setHairpinAngles(seedPeptide);
// queue the calculations
Map<BackboneFingerprint,Peptide> resultMap = new ConcurrentHashMap<>();
List<Future<Result>> futures = new ArrayList<>();
for (int i=0; i < Settings.NUMBER_OF_THREADS; i++)
//for (int i=0; i < 1; i++)
{
SheetUnit job = new SheetUnit(seedPeptide, resultMap, armLength, maxIterations, desiredNumberOfResults, deltaAlpha);
Future<Result> f = GeneralThreadService.submit(job);
futures.add(f);
}
// wait for them to finish
GeneralThreadService.silentWaitForFutures(futures);
// return the results
return new ArrayList<Peptide>(resultMap.values());
}
/**
* This runs a Monte Carlo simulation that tries to find low energy beta sheet conformations of a hairpin.
*/
private static class SheetUnit implements WorkUnit
{
public final Peptide seedPeptide;
public final Map<BackboneFingerprint,Peptide> resultMap;
public final int armLength;
public final int maxIterations;
public final int desiredNumberOfResults;
public final double deltaAlpha;
public final int ID;
public static final AtomicInteger ID_GENERATOR = new AtomicInteger();
public SheetUnit(Peptide seedPeptide, Map<BackboneFingerprint,Peptide> resultMap, int armLength,
int maxIterations, int desiredNumberOfResults, double deltaAlpha)
{
this.seedPeptide = seedPeptide;
this.resultMap = resultMap;
this.armLength = armLength;
this.maxIterations = maxIterations;
this.desiredNumberOfResults = desiredNumberOfResults;
this.deltaAlpha = deltaAlpha;
ID = ID_GENERATOR.incrementAndGet();
}
public Result call()
{
// get some information before we start
int numberOfResidues = seedPeptide.sequence.size();
List<Integer> hairpinIndices = ImmutableList.of(numberOfResidues/2 - 1, numberOfResidues/2);
int prolineResidueIndex = numberOfResidues/2 - 1;
int glycineResidueIndex = prolineResidueIndex + 1;
Peptide currentPeptide = seedPeptide;
Peptide lastPeptide = seedPeptide;
NormalDistribution prolinePhiDistribution = new NormalDistribution(53.0, 5.0);
NormalDistribution prolinePsiDistribution = new NormalDistribution(-132.0, 5.0);
NormalDistribution glycinePhiDistribution = new NormalDistribution(-96.0, 15.0);
NormalDistribution glycinePsiDistribution = new NormalDistribution(9.0, 15.0);
NormalDistribution betaNegativeDistribution = new NormalDistribution(-120.0, 15.0);
NormalDistribution betaPositiveDistribution = new NormalDistribution(120.0, 15.0);
NormalDistribution omegaDistribution = new NormalDistribution(180.0, 5.0);
// do Monte Carlo
double currentAlpha = 0.0;
for (int iteration=1; iteration <= maxIterations; iteration++)
{
// quit if we have obtained the desired number of results
if ( resultMap.size() >= desiredNumberOfResults )
{
System.out.printf("[%2d] Reached the maximum number of results.\n", ID);
break;
}
// pick a residue at random
int randomIndex = ThreadLocalRandom.current().nextInt(numberOfResidues);
// make the mutation
Peptide candidatePeptide = currentPeptide;
if ( hairpinIndices.contains(randomIndex) )
{
// this is a hairpin position
candidatePeptide = BackboneMutator.setOmega (candidatePeptide, prolineResidueIndex, angleModulus(omegaDistribution.sample()));
candidatePeptide = BackboneMutator.setPhiPsi(candidatePeptide, prolineResidueIndex,
angleModulus(prolinePhiDistribution.sample()),
angleModulus(prolinePsiDistribution.sample()));
candidatePeptide = RotamerMutator.setChis (candidatePeptide, prolineResidueIndex, ImmutableList.of(-22.0, 32.0, -29.0));
candidatePeptide = BackboneMutator.setOmega (candidatePeptide, glycineResidueIndex, angleModulus(omegaDistribution.sample()));
candidatePeptide = BackboneMutator.setPhiPsi(candidatePeptide, glycineResidueIndex,
angleModulus(glycinePhiDistribution.sample()),
angleModulus(glycinePsiDistribution.sample()));
}
else
{
// this is a normal position
candidatePeptide = BackboneMutator.setOmega (candidatePeptide, randomIndex, angleModulus(omegaDistribution.sample()));
candidatePeptide = BackboneMutator.setPhiPsi(candidatePeptide, randomIndex,
angleModulus(betaNegativeDistribution.sample()),
angleModulus(betaPositiveDistribution.sample()));
}
// OPLS minimization
//candidatePeptide = TinkerJob.minimize(candidatePeptide, Forcefield.OPLS, 100, false, true, false, false, false);
try
{
candidatePeptide = TinkerJob.minimize(candidatePeptide, Forcefield.AMOEBA, 250, false, true, false, false, false);
}
catch (Exception e)
{
System.out.printf("[%2d] %d of %d: rejected ( tinker error )\n", ID, iteration, maxIterations);
continue;
}
// check for hydrogen bonds
boolean isSheet = isSheet(candidatePeptide,1);
if ( isSheet )
{
// only add to result map if this is a sheet
BackboneFingerprint backboneFingerprint = new BackboneFingerprint(candidatePeptide);
resultMap.put(backboneFingerprint, candidatePeptide);
}
else
{
System.out.printf("[%2d] %d of %d: auto-rejected ( candidateE = %.2f, currentE = %.2f, isSheet = %b )\n", ID,
iteration, maxIterations, candidatePeptide.energyBreakdown.totalEnergy, currentPeptide.energyBreakdown.totalEnergy, isSheet );
continue;
}
// accept or reject
boolean accept = MonteCarloJob.acceptChange(currentPeptide, candidatePeptide, currentAlpha);
if ( accept )
{
currentPeptide = candidatePeptide;
System.out.printf("[%2d] %d of %d: accepted ( candidateE = %.2f, currentE = %.2f, isSheet = %b )\n", ID,
iteration, maxIterations, candidatePeptide.energyBreakdown.totalEnergy, currentPeptide.energyBreakdown.totalEnergy, isSheet );
}
else
System.out.printf("[%2d] %d of %d: rejected ( candidateE = %.2f, currentE = %.2f, isSheet = %b )\n", ID,
iteration, maxIterations, candidatePeptide.energyBreakdown.totalEnergy, currentPeptide.energyBreakdown.totalEnergy, isSheet );
}
System.out.printf("[%2d] Reached the maximum number of iterations.\n", ID);
return null;
}
}
/**
* Takes an angle and restricts it to the range [-180.0, 180.0] using the modulus.
* @param d an angle
* @return the restricted angle
*/
public static double angleModulus(double d)
{
double m = d % 360.0d;
if (m < -180.0) m = m + 360.0;
if (m > 180.0) m = m - 360.0;
return m;
}
/**
* Determines if the specified peptide is in a beta sheet conformation.
* @param candidatePeptide the peptide to check
* @param offset if the ideal number of interstrand H-bonds is n, then we ensure that there are at least n-offset bonds
* @return true if this is in a beta sheet conformation
*/
public static boolean isSheet(Peptide candidatePeptide, int offset)
{
// check for hydrogen bonds
int numberOfResidues = candidatePeptide.sequence.size();
int numberOfHydrogenBonds = 0;
for (int i=0; i < (numberOfResidues / 2)-1; i++)
{
int j = numberOfResidues-i-1;
Residue residueI = candidatePeptide.sequence.get(i);
Residue residueJ = candidatePeptide.sequence.get(j);
if ( residueJ.HN != null &&
Molecule.getDistance(residueI.O, residueJ.HN) < 2.5 && Molecule.getAngle(residueI.O, residueJ.HN, residueJ.N) > 120.0 )
numberOfHydrogenBonds++;
if ( residueI.HN != null &&
Molecule.getDistance(residueJ.O, residueI.HN) < 2.5 && Molecule.getAngle(residueJ.O, residueI.HN, residueI.N) > 120.0 )
numberOfHydrogenBonds++;
}
if ( numberOfHydrogenBonds < (numberOfResidues / 2) - 1 - offset )
return false;
return true;
}
/**
* Minimizes the specified sheets and checks them for whether they are beta sheets.
* Does not use any solvation at all.
* @param sheets the peptides to minimize
* @param iterations the maximum number of iterations to use
* @param forcefield which forcefield to use
* @return the minimized beta sheetes
*/
public static List<Peptide> minimizeSheets(List<Peptide> sheets, int iterations, Forcefield forcefield)
{
// run minimizations
List<Future<Result>> futures = new ArrayList<>(sheets.size());
for (Peptide p : sheets)
{
TinkerJob job = new TinkerJob(p, forcefield, iterations, false, false, false, false, false);
Future<Result> f = GeneralThreadService.submit(job);
futures.add(f);
}
//GeneralThreadService.silentWaitForFutures(futures);
GeneralThreadService.waitForFutures(futures);
// retrieve results
List<Peptide> returnList = new ArrayList<>();
for (Future<Result> f : futures)
{
try
{
TinkerJob.TinkerResult result = (TinkerJob.TinkerResult)f.get();
Peptide resultPeptide = result.minimizedPeptide;
//if ( isSheet(resultPeptide,0) )
returnList.add(resultPeptide);
}
catch (Exception e) {}
}
return ImmutableList.copyOf(returnList);
}
/**
* Minimizes sheets in serial.
*/
public static List<Peptide> minimizeSheetsInSerial(List<Peptide> sheets, int iterations, Forcefield forcefield)
{
List<Peptide> results = new ArrayList<>();
for (Peptide p : sheets)
{
TinkerJob job = new TinkerJob(p, forcefield, iterations, false, false, false, false, false);
try
{
Peptide resultPeptide = job.call().minimizedPeptide;
if ( isSheet(resultPeptide, 0) )
results.add(resultPeptide);
}
catch (Exception e) {}
}
return results;
}
/** Creates some beta sheets. */
public static void main(String[] args)
{
DatabaseLoader.go();
List<Peptide> sheets = generateSheets(6, 10, 10000, 0.01);
sheets = new ArrayList<>(sheets);
Collections.sort(sheets);
// remove duplicates
int maxResults = 100;
double RMSDthreshold = 2.0;
List<Peptide> results = new ArrayList<>();
results.add(sheets.get(0));
for (int i=1; i < sheets.size(); i++)
{
System.out.printf("superimposing %d of %d\r", i+1, sheets.size());
Peptide candidatePeptide = sheets.get(i);
Superposition superposition = Superposition.superimpose(candidatePeptide, results);
boolean accept = true;
System.out.println(superposition.RMSDs);
for (Double RMSD : superposition.RMSDs)
{
// reject this candidate if it's too similar to an existing peptide
if ( RMSD <= RMSDthreshold )
{
accept = false;
break;
}
}
if ( accept )
results.add(candidatePeptide);
if ( results.size() >= maxResults )
break;
}
System.out.println("\nfinished superimposing");
for ( int i=0; i < results.size(); i++ )
{
Peptide p = sheets.get(i);
GaussianInputFile f = new GaussianInputFile(p);
String filename = String.format("test_peptides/sheet_%02d.gjf", i);
f.write(filename);
System.out.printf("Sheet %02d: E = %7.2f\n", i, p.energyBreakdown.totalEnergy);
filename = String.format("test_peptides/sheet_%02d.chk", i);
p.checkpoint(filename);
}
}
}