Skip to content

Commit

Permalink
Add a parameter to override a test and include spectra for mzML/mzXML…
Browse files Browse the repository at this point in the history
… centroided spectra

Existing code always tests the spectrum peak data for a minimum median peak distance of 50 ppm (to be considered centroid data), which always overrides the value read from an mzML or mzXML file. The parameter overrides the test result in the case that the mzML/mzXML file reports the spectrum as centroided, but the peak data failed the test.
  • Loading branch information
FarmGeek4Life committed Jan 12, 2023
1 parent 2712ab0 commit 2ee4ccb
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 14 deletions.
7 changes: 7 additions & 0 deletions docs/Changelog.html
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ <h1 class="pagetitle">MS-GF+ ChangeLog</h1>
<a href="index.html">MS-GF+ Documentation home</a>
</p>

<p>
<b>v2023.01.12</b>
</p>
<ul>
<li>Add parameter and output messages for working with particularly dense centroided data (read from mzML or mzXML)</li>
</ul>

<p>
<b>v2022.04.18</b>
</p>
Expand Down
4 changes: 4 additions & 0 deletions docs/MSGFPlus.html
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ <h1>MS-GF+</h1>
<span class="code-keyword">[-maxMissedCleavages Count]</span> (Exclude peptides with more than this number of missed cleavages from the search; <span class="code-object">Default: -1 (no limit)</span>)

<span class="code-keyword">[-numMods Count]</span> (Maximum number of dynamic (variable) modifications per peptide; <span class="code-object">Default: 3</span>)

<span class="code-keyword">[-allowDenseCentroidedPeaks 0/1]</span> (<span class="code-object">Default: 0 (disabled)</span>; 1: (for mzML/mzXML input only) allows inclusion of spectra with high-density centroid data in the search)
MS-GF+ checks the distance between consecutive peaks in the spectrum, and if the median distance is less than 50 ppm, they are considered profile spectra regardless of the value provided in mzML and mzXML files.
This parameter allows overriding this check when the mzML/mzXML file says the spectrum is centroided.
</pre>
</div>

Expand Down
7 changes: 7 additions & 0 deletions src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public class SearchParams {
private double chargeCarrierMass;
private int maxMissedCleavages;
private int maxNumMods;
private boolean allowDenseCentroidedPeaks;

public SearchParams() {
}
Expand Down Expand Up @@ -214,6 +215,10 @@ public int getMaxMissedCleavages() {
return maxMissedCleavages;
}

// Used by MS-GF+
public boolean getAllowDenseCentroidedPeaks() {
return allowDenseCentroidedPeaks;
}

/**
* Look for # in dataLine
Expand Down Expand Up @@ -402,6 +407,8 @@ public String parse(ParamManager paramManager) {
} else if (maxMissedCleavages > -1 && enzyme.getName().equals("NoCleavage")) {
return "Cannot specify a MaxMissedCleavages when using no cleavage enzyme";
}

allowDenseCentroidedPeaks = paramManager.getAllowDenseCentroidedPeaks() == 1;

maxNumMods = paramManager.getMaxNumModsPerPeptide();
int maxNumModsCompare = aaSet.getMaxNumberOfVariableModificationsPerPeptide();
Expand Down
28 changes: 22 additions & 6 deletions src/main/java/edu/ucsd/msjava/msutil/SpecKey.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ public static ArrayList<SpecKey> getSpecKeyList(
int minCharge,
int maxCharge,
ActivationMethod activationMethod,
int minNumPeaksPerSpectrum) {
int minNumPeaksPerSpectrum,
boolean allowDenseCentroidedData) {

Iterator<Spectrum> itr = specAcc.getSpecItr();

Expand All @@ -78,7 +79,8 @@ public static ArrayList<SpecKey> getSpecKeyList(
minCharge,
maxCharge,
activationMethod,
minNumPeaksPerSpectrum);
minNumPeaksPerSpectrum,
allowDenseCentroidedData);


SpectrumParser parser = specAcc.getSpectrumParser();
Expand All @@ -101,14 +103,16 @@ public static ArrayList<SpecKey> getSpecKeyList(
int minCharge,
int maxCharge,
ActivationMethod activationMethod,
int minNumPeaksPerSpectrum) {
int minNumPeaksPerSpectrum,
boolean allowDenseCentroidedData) {

if (activationMethod == ActivationMethod.FUSION)
return getFusedSpecKeyList(itr, startSpecIndex, endSpecIndex, minCharge, maxCharge);

ArrayList<SpecKey> specKeyList = new ArrayList<SpecKey>();

int numProfileSpectra = 0;
int numDenseCentroidedSpectra = 0;
int numSpectraWithTooFewPeaks = 0;
final int MAX_INFORMATIVE_MESSAGES = 10;
int informativeMessageCount = 0;
Expand Down Expand Up @@ -176,17 +180,25 @@ public static ArrayList<SpecKey> getSpecKeyList(
}
}

if (!spec.isCentroided()) {
if (!spec.isCentroided() && !(spec.isCentroidedWithDensePeaks() && allowDenseCentroidedData)) {
String message = "Skip spectrum " + spec.getID() + " since ";
if (spec.isCentroidedWithDensePeaks()) {
message += "peaks are too dense";
numDenseCentroidedSpectra++;
} else {
message += "it is not centroided";
numProfileSpectra++;
}

if (informativeMessageCount < MAX_INFORMATIVE_MESSAGES) {
System.out.println("Skip spectrum " + spec.getID() + " since it is not centroided");
System.out.println(message);
informativeMessageCount++;
} else {
if (informativeMessageCount == MAX_INFORMATIVE_MESSAGES) {
System.out.println(" ...");
informativeMessageCount++;
}
}
numProfileSpectra++;
continue;
}

Expand All @@ -206,6 +218,10 @@ public static ArrayList<SpecKey> getSpecKeyList(

System.out.println("Ignoring " + numProfileSpectra + " profile spectra.");
System.out.println("Ignoring " + numSpectraWithTooFewPeaks + " spectra having less than " + minNumPeaksPerSpectrum + " peaks.");
if (numDenseCentroidedSpectra > 0) {
System.out.println("Ignoring " + numDenseCentroidedSpectra + " spectra marked as centroid with dense peaks (<50ppm median distance).\n" +
" Re-run search with parameter '-allowDenseCentroidedPeaks 1' to include these spectra in the search");
}

return specKeyList;
}
Expand Down
32 changes: 29 additions & 3 deletions src/main/java/edu/ucsd/msjava/msutil/Spectrum.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ public enum Polarity {
private Polarity scanPolarity = Polarity.POSITIVE;

private Boolean isCentroided = true;
private Boolean externalSetIsCentroided = false;
private Boolean isCentroidedWithDensePeaks = false;

private boolean isHighPrecision = false;
// private Tolerance precursorTolerance = null;
Expand Down Expand Up @@ -256,6 +258,15 @@ public boolean isCentroided() {
return this.isCentroided;
}

/**
* Whether this spectrum is centroided according to the reader, but failed determineIfCentroided() because peaks are too dense.
*
* @return false unless the reader called setIsCentroided(true) and determineIfCentroided() failed
*/
public boolean isCentroidedWithDensePeaks() {
return this.isCentroidedWithDensePeaks;
}

/**
* Returns whether this spectrum peaks are measured with high-precision.
*
Expand Down Expand Up @@ -437,6 +448,8 @@ public void setScanPolarity(Polarity scanPolarity) {
*/
public void setIsCentroided(boolean isCentroided) {
this.isCentroided = isCentroided;
// function is used for mzML and mzXML files, track that isCentroided was set outside of this class
this.externalSetIsCentroided = true;
}

/**
Expand Down Expand Up @@ -489,7 +502,7 @@ public Float getIsolationWindowTargetMz() {
* Sets isCentroided by a simple testing.
*/
public void determineIsCentroided() {
this.isCentroided = true;
boolean centroidedCheckPass = true;

// if(this.size() > 100)
// {
Expand All @@ -516,8 +529,21 @@ public void determineIsCentroided() {
prevMz = curMz;
}
Collections.sort(diff);
if (diff.size() > 0 && diff.get(diff.size() / 2) < 50)
isCentroided = false;
if (diff.size() > 0 && diff.get(diff.size() / 2) < 50) {
// Check failed - the median PPM distance between peaks is less than 50 PPM
centroidedCheckPass = false;
}
}

if (centroidedCheckPass) {
this.isCentroided = true;
} else {
if (this.isCentroided && this.externalSetIsCentroided) {
// set a flag to notify the user
this.isCentroidedWithDensePeaks = true;
}

this.isCentroided = false;
}
}

Expand Down
19 changes: 19 additions & 0 deletions src/main/java/edu/ucsd/msjava/params/ParamManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ public enum ParamNameEnum {
ADD_FEATURES("addFeatures", "AddFeatures", "Include additional features in the output (enable this to post-process results with Percolator)",
"0 means Output basic scores only (Default)\n" +
"\t 1 means Output additional features"),

ALLOW_DENSE_CENTROIDED_PEAKS("allowDenseCentroidedPeaks", "AllowDenseCentroidedPeaks", "Allow centroid scans with dense peaks (Default: 0)\n" +
"\t (for mzML or mzXML files, the console output will tell you if you might want to use this)", null),

DD_DIRECTORY("dd", "DBIndexDir", "Path to the directory containing database index files", null),

Expand Down Expand Up @@ -652,6 +655,13 @@ private void addMaxNumModsParam() {
addParameter(maxNumMods);
}

private void addAllowDenseCentroidedPeaksParam() {
EnumParameter allowDenseCentroidedPeaksParam = new EnumParameter(ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS);
allowDenseCentroidedPeaksParam.registerEntry("Skip all spectra that fail a peak density check").setDefault();
allowDenseCentroidedPeaksParam.registerEntry("Allow mzML/mzXML centroided spectra that fail a peak density check");
addParameter(allowDenseCentroidedPeaksParam);
}

private void addDbIndexDirParam(boolean isHidden) {
FileParameter dbIndexDirParam = new FileParameter(ParamNameEnum.DD_DIRECTORY);
dbIndexDirParam.fileMustExist();
Expand Down Expand Up @@ -780,6 +790,8 @@ public void addMSGFPlusParams() {
addChargeCarrierMassParam();
addMaxMissedCleavagesParam();
addMaxNumModsParam();

addAllowDenseCentroidedPeaksParam();

addExample("Example (high-precision): java -Xmx3500M -jar MSGFPlus.jar -s test.mzML -d IPI_human_3.79.fasta -inst 1 -t 20ppm -ti -1,2 -ntt 2 -tda 1 -o testMSGFPlus.mzid -mod Mods.txt");
addExample("Example (low-precision): java -Xmx3500M -jar MSGFPlus.jar -s test.mzML -d IPI_human_3.79.fasta -inst 0 -t 0.5Da,2.5Da -ntt 2 -tda 1 -o testMSGFPlus.mzid -mod Mods.txt");
Expand Down Expand Up @@ -907,6 +919,8 @@ public void addMSGFDBParams() {
uniformAAProb.registerEntry("Use amino acid probabilities computed from the input database").setDefault();
uniformAAProb.registerEntry("Use probability 0.05 for all amino acids");
addParameter(uniformAAProb);

addAllowDenseCentroidedPeaksParam();

addExample("Example (high-precision): java -Xmx2000M -jar MSGFDB.jar -s test.mzXML -d IPI_human_3.79.fasta -t 30ppm -c13 1 -nnet 0 -tda 1 -o testMSGFDB.tsv");
addExample("Example (low-precision): java -Xmx2000M -jar MSGFDB.jar -s test.mzXML -d IPI_human_3.79.fasta -t 0.5Da,2.5Da -nnet 0 -tda 1 -o testMSGFDB.tsv");
Expand Down Expand Up @@ -1175,6 +1189,11 @@ public FileParameter getConfigFileParam() {
return ((FileParameter) getParameter(ParamNameEnum.CONFIGURATION_FILE.key));
}

// Used by MS-GF+
public int getAllowDenseCentroidedPeaks() {
return getIntValue(ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS.key);
}

public int getIntValue(String key) {
Parameter param = this.getParameter(key);
if (param instanceof IntParameter)
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/edu/ucsd/msjava/ui/MSGFDB.java
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ private static String runMSGFDB(File specFile, SpecFileFormat specFormat, File o
boolean useUniformAAProb = paramManager.getIntValue(ParamManager.ParamNameEnum.UNIFORM_AA_PROBABILITY.getKey()) == 1;
boolean replicateMergedResults = paramManager.getIntValue("replicate") == 1;
boolean doNotDseEdgeScore = paramManager.getIntValue(ParamManager.ParamNameEnum.EDGE_SCORE.getKey()) == 1;
boolean allowDenseCentroidedPeaks = paramManager.getIntValue(ParamManager.ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS.getKey()) == 1;

System.out.println("Loading database files...");
File dbIndexDir = paramManager.getFile(ParamManager.ParamNameEnum.DD_DIRECTORY.getKey());
Expand Down Expand Up @@ -277,7 +278,7 @@ private static String runMSGFDB(File specFile, SpecFileFormat specFormat, File o
int avgPeptideMass = 2000;
int numBytesPerMass = 12;
int numSpecScannedTogether = (int) ((float) maxMemory / avgPeptideMass / numBytesPerMass);
ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM);
ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM, allowDenseCentroidedPeaks);
int specSize = specKeyList.size();

System.out.print("Reading spectra finished ");
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/edu/ucsd/msjava/ui/MSGFDBLib.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public static String runMSGFLib(ParamManager paramManager) {
int avgPeptideMass = 2000;
int numBytesPerMass = 12;
int numSpecScannedTogether = (int) ((float) maxMemory / avgPeptideMass / numBytesPerMass);
ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), 0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM);
ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), 0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM, false);
int specSize = specKeyList.size();

System.out.print("Reading spectra finished ");
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/edu/ucsd/msjava/ui/MSGFPlus.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@


public class MSGFPlus {
public static final String VERSION = "Release (v2022.04.18)";
public static final String RELEASE_DATE = "18 April 2022";
public static final String VERSION = "Release (v2023.01.12)";
public static final String RELEASE_DATE = "12 January 2023";

public static final String DECOY_DB_EXTENSION = ".revCat.fasta";
public static final String DEFAULT_DECOY_PROTEIN_PREFIX = "XXX";
Expand Down Expand Up @@ -185,6 +185,7 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o

int numThreads = params.getNumThreads();
boolean doNotUseEdgeScore = params.doNotUseEdgeScore();
boolean allowDenseCentroidedPeaks = params.getAllowDenseCentroidedPeaks();

int minNumPeaksPerSpectrum = params.getMinNumPeaksPerSpectrum();
if (minNumPeaksPerSpectrum == -1) // not specified
Expand Down Expand Up @@ -267,7 +268,7 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o
return "Error while parsing spectrum file: " + specFile.getPath();

ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc,
startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, minNumPeaksPerSpectrum);
startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, minNumPeaksPerSpectrum, allowDenseCentroidedPeaks);

int specSize = specKeyList.size();
if (specSize == 0)
Expand Down

0 comments on commit 2ee4ccb

Please sign in to comment.