diff --git a/.vscode/settings.json b/.vscode/settings.json index 1b01f8fd..e6f95556 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -17,7 +17,7 @@ "-p", "test_*.py" ], - "python.testing.pytestEnabled": false, + "python.testing.pytestEnabled": true, "python.testing.unittestEnabled": true, "flake8.args": [ "--max-line-length=120", diff --git a/proteobench/io/params/fragger.py b/proteobench/io/params/fragger.py index 3d80d93f..104a31ff 100644 --- a/proteobench/io/params/fragger.py +++ b/proteobench/io/params/fragger.py @@ -7,14 +7,22 @@ from __future__ import annotations import logging +import re from collections import namedtuple +from pathlib import Path + +import pandas as pd + +from proteobench.io.params import ProteoBenchParameters logger = logging.getLogger(__name__) Parameter = namedtuple("Parameter", ["name", "value", "comment"]) +VERSION_NO_PATTERN = r"\d+(\.\d+)*" + -def read_file(file: str) -> list[Parameter]: +def read_file(file: str, sep: str = " = ") -> list[Parameter]: """Read FragPipe parameter file as list of records.""" with open(file) as f: data = [] @@ -36,7 +44,7 @@ def read_file(file: str) -> list[Parameter]: else: param = line comment = None - res = param.strip().split(" = ") + res = param.strip().split(sep, maxsplit=1) if len(res) == 1: param = res[0].strip() data.append(Parameter(param, None, comment)) @@ -46,13 +54,83 @@ def read_file(file: str) -> list[Parameter]: return data +def extract_params(file: str, f_fragpipe_workflow) -> ProteoBenchParameters: + msfragger_params = read_file(file) + msfragger_params = pd.DataFrame.from_records(msfragger_params, columns=Parameter._fields).set_index( + Parameter._fields[0] + ) + fragpipe_params = read_file(f_fragpipe_workflow, sep="=") + fragpipe_params = pd.DataFrame.from_records(fragpipe_params, columns=Parameter._fields).set_index( + Parameter._fields[0] + ) + + # FragPipe version in first line + with open(f_fragpipe_workflow) as f: + header = next(iter(f))[1:].strip() + + match = re.search(VERSION_NO_PATTERN, header) + + if match: + header = match.group() + + params = ProteoBenchParameters() + params.software_name = "FragPipe" + params.software_version = header + params.search_engine = "MSFragger" + + msfragger_executable = fragpipe_params.loc["fragpipe-config.bin-msfragger", "value"] + msfragger_executable = Path(msfragger_executable).name + match = re.search(VERSION_NO_PATTERN, msfragger_executable) + + if match: + msfragger_executable = match.group() + + params.search_engine_version = msfragger_executable + params.enzyme = msfragger_params.loc["search_enzyme_name_1", "value"] + params.allowed_miscleavages = msfragger_params.loc["allowed_missed_cleavage_1", "value"] + params.fixed_mods = fragpipe_params.loc["msfragger.table.fix-mods", "value"] + params.variable_mods = fragpipe_params.loc["msfragger.table.var-mods", "value"] + params.max_mods = msfragger_params.loc["max_variable_mods_per_peptide", "value"] + params.min_peptide_length = msfragger_params.loc["digest_min_length", "value"] + params.max_peptide_length = msfragger_params.loc["digest_max_length", "value"] + + params.precursor_mass_tolerance = msfragger_params.loc["precursor_true_tolerance", "value"] + params.fragment_mass_tolerance = msfragger_params.loc["fragment_mass_tolerance", "value"] + # ! ionquant is not necessarily fixed? + params.ident_fdr_protein = fragpipe_params.loc["ionquant.proteinfdr", "value"] + params.ident_fdr_peptide = fragpipe_params.loc["ionquant.peptidefdr", "value"] + params.ident_fdr_psm = fragpipe_params.loc["ionquant.ionfdr", "value"] + + for key in ["ident_fdr_protein", "ident_fdr_peptide", "ident_fdr_psm"]: + value = getattr(params, key) + try: + value = int(value) / 100 + setattr(params, key, value) + except ValueError: + logging.warning(f"Could not convert {value} to int.") + + min_precursor_charge, max_precursor_charge = msfragger_params.loc["precursor_charge", "value"].split(" ") + params.min_precursor_charge = int(min_precursor_charge) + params.max_precursor_charge = int(max_precursor_charge) + params.enable_match_between_runs = bool(fragpipe_params.loc["ionquant.mbr", "value"]) + return params + + if __name__ == "__main__": import pathlib - - import pandas as pd + from pprint import pprint file = pathlib.Path("../../../test/params/fragger.params") data = read_file(file) df = pd.DataFrame.from_records(data, columns=Parameter._fields).set_index(Parameter._fields[0]) - df df.to_csv(file.with_suffix(".csv")) + + file_fragpipe = pathlib.Path("../../../test/params/fragpipe.workflow") + data = read_file(file_fragpipe, sep="=") + df = pd.DataFrame.from_records(data, columns=Parameter._fields).set_index(Parameter._fields[0]) + df.to_csv(file_fragpipe.with_suffix(".csv")) + + params = extract_params(file, file_fragpipe) + pprint(params.__dict__) + series = pd.Series(params.__dict__) + series.to_csv(file.parent / "fragger_extracted_params.csv") diff --git a/proteobench/io/params/proline.py b/proteobench/io/params/proline.py index 94528021..9662c0f4 100644 --- a/proteobench/io/params/proline.py +++ b/proteobench/io/params/proline.py @@ -7,7 +7,6 @@ - "Import and filters" - "Quant config" """ -import pathlib import re import pandas as pd @@ -56,8 +55,8 @@ def extract_params(fname) -> ProteoBenchParameters: sheet = sheet[cols].drop_duplicates().reset_index(drop=True) # Extract params.software_name = "Proline" - params.software_version = sheet.loc[0, "software_version"] params.search_engine = sheet.loc[0, "software_name"] + params.search_engine_version = sheet.loc[0, "software_version"] params.enzyme = sheet.loc[0, "enzymes"] params.allowed_miscleavages = sheet.loc[0, "max_missed_cleavages"] params.fixed_mods = sheet.loc[0, "fixed_ptms"] @@ -74,10 +73,11 @@ def extract_params(fname) -> ProteoBenchParameters: assert all(stats.loc["unique", cols] == 1), "Not all columns are unique" sheet = sheet[cols].drop_duplicates().reset_index(drop=True) # Extract - params.ident_fdr_psm = sheet.loc[0, "psm_filter_expected_fdr"] # ! 1 stands for 1% FDR + params.ident_fdr_psm = int(sheet.loc[0, "psm_filter_expected_fdr"]) / 100 params.min_peptide_length = find_min_pep_length(sheet.loc[0, "psm_filter_2"]) # ! Third sheet only contains match between runs (MBR) information indirectly + sheet_name = "Quant config" sheet = excel.parse(sheet_name, dtype="object", index_col=0) enable_match_between_runs = sheet.index.str.contains("cross assignment").any() params.enable_match_between_runs = enable_match_between_runs @@ -85,13 +85,14 @@ def extract_params(fname) -> ProteoBenchParameters: if __name__ == "__main__": - file = pathlib.Path("../../../test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx") + from pathlib import Path + + file = Path("../../../test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx") params = extract_params(file) data_dict = params.__dict__ series = pd.Series(data_dict) series.to_csv(file.with_suffix(".csv")) - - file = pathlib.Path("../../../test/params/Proline_example_2.xlsx") + file = Path("../../../test/params/Proline_example_2.xlsx") params = extract_params(file) data_dict = params.__dict__ series = pd.Series(data_dict) diff --git a/test/params/Proline_example_2.csv b/test/params/Proline_example_2.csv index 100f04c1..b50ad116 100644 --- a/test/params/Proline_example_2.csv +++ b/test/params/Proline_example_2.csv @@ -1,12 +1,12 @@ ,0 software_name,Proline -software_version,X! Tandem Vengeance (2015.12.15.2) +software_version, search_engine,XTandem -search_engine_version, -ident_fdr_psm,1 +search_engine_version,X! Tandem Vengeance (2015.12.15.2) +ident_fdr_psm,0.01 ident_fdr_peptide, ident_fdr_protein, -enable_match_between_runs,False +enable_match_between_runs,True precursor_mass_tolerance,10.0 ppm fragment_mass_tolerance,0.02 Da enzyme,Trypsin diff --git a/test/params/Proline_example_w_Mascot_wo_proteinSets.csv b/test/params/Proline_example_w_Mascot_wo_proteinSets.csv index 04462162..4161f392 100644 --- a/test/params/Proline_example_w_Mascot_wo_proteinSets.csv +++ b/test/params/Proline_example_w_Mascot_wo_proteinSets.csv @@ -1,12 +1,12 @@ ,0 software_name,Proline -software_version,2.8.0.1 +software_version, search_engine,Mascot -search_engine_version, -ident_fdr_psm,1 +search_engine_version,2.8.0.1 +ident_fdr_psm,0.01 ident_fdr_peptide, ident_fdr_protein, -enable_match_between_runs,False +enable_match_between_runs,True precursor_mass_tolerance,10.0 ppm fragment_mass_tolerance,0.02 Da enzyme,Trypsin/P diff --git a/test/params/fragger.csv b/test/params/fragger.csv index f325babb..b4f06547 100644 --- a/test/params/fragger.csv +++ b/test/params/fragger.csv @@ -1,10 +1,10 @@ name,value,comment -database_name,Q:\MISC_PERSONAL\Bart\Benchmark_experiment_EuBIC\Shared\2023-01-30-decoys-BenchmarkFASTAModule1_DDA.fasta.fas,Path to the protein database file in FASTA format. -num_threads,47,Number of CPU threads to use. -precursor_mass_lower,-10,Lower bound of the precursor mass window. -precursor_mass_upper,10,Upper bound of the precursor mass window. +database_name,/mnt/PUMA/ProjectSQ/ASchmidt/KlemensErwinFrohlich_568/20230131-143622_Analysis/ProteoBench/proteobench/Module_2_DDA_quantification/FASTA/2023-12-16-decoys-BenchmarkFASTAModule2_DDA.fasta.fas,Path to the protein database file in FASTA format. +num_threads,50,Number of CPU threads to use. +precursor_mass_lower,-20,Lower bound of the precursor mass window. +precursor_mass_upper,20,Upper bound of the precursor mass window. precursor_mass_units,1,"Precursor mass tolerance units (0 for Da, 1 for ppm)." -data_type,0,"Data type (0 for DDA, 1 for DIA, 2 for gas-phase fractionation DIA)." +data_type,0,"Data type (0 for DDA, 1 for DIA, 2 for gas-phase fractionation DIA, 3 for wide-window acquisition DDA)." precursor_true_tolerance,20,True precursor mass tolerance (window is +/- this value). precursor_true_units,1,"True precursor mass tolerance units (0 for Da, 1 for ppm)." fragment_mass_tolerance,20,Fragment mass tolerance (window is +/- this value). @@ -16,13 +16,19 @@ deisotope,1,"Perform deisotoping or not (0=no, 1=yes and assume singleton peaks deneutralloss,1,"Perform deneutrallossing or not (0=no, 1=yes)." isotope_error,0/1/2,Also search for MS/MS events triggered on specified isotopic peaks. mass_offsets,0.0,Creates multiple precursor tolerance windows with specified mass offsets. +mass_offsets_detailed =,,Optional detailed mass offset list. Overrides mass_offsets if use_detailed_offsets = 1. +use_detailed_offsets,0,Whether to use the regular (0) or detailed (1) mass offset list. precursor_mass_mode,selected,One of isolated/selected/corrected. remove_precursor_peak,1,Remove precursor peaks from tandem mass spectra. 0 = not remove; 1 = remove the peak with precursor charge; 2 = remove the peaks with all charge states (only for DDA mode). remove_precursor_range,"-1.500000,1.500000",m/z range in removing precursor peaks. Only for DDA mode. Unit: Th. intensity_transform,0,Transform peaks intensities with sqrt root. 0 = not transform; 1 = transform using sqrt root. activation_types,all,"Filter to only search scans of provided activation type(s). Allowed: All, HCD, CID, ETD, ECD." +group_variable,0,Specify the variable used to decide the PSM group in the group FDR estimation. 0 = no group FDR; 1 = num_enzyme_termini; 2 = PE from protein header. +require_precursor,1,"If required, PSMs with no precursor peaks will be discarded. For DIA data type only. 0 = no, 1 = yes." +reuse_dia_fragment_peaks,0,"Allow the same peak matches to multiple peptides. For DIA data type only. 0 = no, 1 = yes." write_calibrated_mzml,0,"Write calibrated MS2 scan to a mzML file (0 for No, 1 for Yes)." write_uncalibrated_mgf,0,"Write uncalibrated MS2 scan to a MGF file (0 for No, 1 for Yes). Only for .raw and .d formats." +write_mzbin_all,0, mass_diff_to_variable_mod,0,Put mass diff as a variable modification. 0 for no; 1 for yes and remove delta mass; 2 for yes and keep delta mass. localize_delta_mass,0,"Include fragment ions mass-shifted by unknown modifications (recommended for open and mass offset searches) (0 for OFF, 1 for ON)." delta_mass_exclude_ranges,"(-1.5,3.5)",Exclude mass range for shifted ions searching. @@ -51,7 +57,7 @@ variable_mod_02,42.0106 [^ 1, allow_multiple_variable_mods_on_residue,0, max_variable_mods_per_peptide,3,Maximum total number of variable modifications per peptide. max_variable_mods_combinations,5000,Maximum number of modified forms allowed for each peptide (up to 65534). -output_format,tsv_pepXML_pin,"File format of output files (tsv, pin, pepxml, tsv_pin, tsv_pepxml, pepxml_pin, or tsv_pepxml_pin)." +output_format,pepXML_pin,"File format of output files (tsv, pin, pepxml, tsv_pin, tsv_pepxml, pepxml_pin, or tsv_pepxml_pin)." output_report_topN,1,Reports top N PSMs per input spectrum. output_max_expect,50,Suppresses reporting of PSM if top hit has expectation value greater than this threshold. report_alternative_proteins,1,"Report alternative proteins for peptides that are found in multiple proteins (0 for no, 1 for yes)." @@ -64,7 +70,6 @@ max_fragment_charge,2,Maximum charge state for theoretical fragments to match (1 track_zero_topN,0,Track top N unmodified peptide results separately from main results internally for boosting features. zero_bin_accept_expect,0,Ranks a zero-bin hit above all non-zero-bin hit if it has expectation less than this value. zero_bin_mult_expect,1,Multiplies expect value of PSMs in the zero-bin during results ordering (set to less than 1 for boosting). -add_topN_complementary,0,Inserts complementary ions corresponding to the top N most intense fragments in each experimental spectra. check_spectral_files,1,Checking spectral files before searching. minimum_peaks,15,Minimum number of peaks in experimental spectrum for matching. use_topN_peaks,150,Pre-process experimental spectrum to only use top N peaks. diff --git a/test/params/fragger.params b/test/params/fragger.params index 63da05e9..0e21e8f6 100644 --- a/test/params/fragger.params +++ b/test/params/fragger.params @@ -1,10 +1,10 @@ -database_name = Q:\MISC_PERSONAL\Bart\Benchmark_experiment_EuBIC\Shared\2023-01-30-decoys-BenchmarkFASTAModule1_DDA.fasta.fas # Path to the protein database file in FASTA format. -num_threads = 47 # Number of CPU threads to use. +database_name = /mnt/PUMA/ProjectSQ/ASchmidt/KlemensErwinFrohlich_568/20230131-143622_Analysis/ProteoBench/proteobench/Module_2_DDA_quantification/FASTA/2023-12-16-decoys-BenchmarkFASTAModule2_DDA.fasta.fas # Path to the protein database file in FASTA format. +num_threads = 50 # Number of CPU threads to use. -precursor_mass_lower = -10 # Lower bound of the precursor mass window. -precursor_mass_upper = 10 # Upper bound of the precursor mass window. +precursor_mass_lower = -20 # Lower bound of the precursor mass window. +precursor_mass_upper = 20 # Upper bound of the precursor mass window. precursor_mass_units = 1 # Precursor mass tolerance units (0 for Da, 1 for ppm). -data_type = 0 # Data type (0 for DDA, 1 for DIA, 2 for gas-phase fractionation DIA). +data_type = 0 # Data type (0 for DDA, 1 for DIA, 2 for gas-phase fractionation DIA, 3 for wide-window acquisition DDA). precursor_true_tolerance = 20 # True precursor mass tolerance (window is +/- this value). precursor_true_units = 1 # True precursor mass tolerance units (0 for Da, 1 for ppm). fragment_mass_tolerance = 20 # Fragment mass tolerance (window is +/- this value). @@ -17,15 +17,21 @@ deisotope = 1 # Perform deisotoping or not (0=no, 1=yes and assume singleton p deneutralloss = 1 # Perform deneutrallossing or not (0=no, 1=yes). isotope_error = 0/1/2 # Also search for MS/MS events triggered on specified isotopic peaks. mass_offsets = 0.0 # Creates multiple precursor tolerance windows with specified mass offsets. +mass_offsets_detailed = # Optional detailed mass offset list. Overrides mass_offsets if use_detailed_offsets = 1. +use_detailed_offsets = 0 # Whether to use the regular (0) or detailed (1) mass offset list. precursor_mass_mode = selected # One of isolated/selected/corrected. remove_precursor_peak = 1 # Remove precursor peaks from tandem mass spectra. 0 = not remove; 1 = remove the peak with precursor charge; 2 = remove the peaks with all charge states (only for DDA mode). remove_precursor_range = -1.500000,1.500000 # m/z range in removing precursor peaks. Only for DDA mode. Unit: Th. intensity_transform = 0 # Transform peaks intensities with sqrt root. 0 = not transform; 1 = transform using sqrt root. activation_types = all # Filter to only search scans of provided activation type(s). Allowed: All, HCD, CID, ETD, ECD. +group_variable = 0 # Specify the variable used to decide the PSM group in the group FDR estimation. 0 = no group FDR; 1 = num_enzyme_termini; 2 = PE from protein header. +require_precursor = 1 # If required, PSMs with no precursor peaks will be discarded. For DIA data type only. 0 = no, 1 = yes. +reuse_dia_fragment_peaks = 0 # Allow the same peak matches to multiple peptides. For DIA data type only. 0 = no, 1 = yes. write_calibrated_mzml = 0 # Write calibrated MS2 scan to a mzML file (0 for No, 1 for Yes). write_uncalibrated_mgf = 0 # Write uncalibrated MS2 scan to a MGF file (0 for No, 1 for Yes). Only for .raw and .d formats. +write_mzbin_all = 0 mass_diff_to_variable_mod = 0 # Put mass diff as a variable modification. 0 for no; 1 for yes and remove delta mass; 2 for yes and keep delta mass. localize_delta_mass = 0 # Include fragment ions mass-shifted by unknown modifications (recommended for open and mass offset searches) (0 for OFF, 1 for ON). @@ -78,7 +84,7 @@ allow_multiple_variable_mods_on_residue = 0 max_variable_mods_per_peptide = 3 # Maximum total number of variable modifications per peptide. max_variable_mods_combinations = 5000 # Maximum number of modified forms allowed for each peptide (up to 65534). -output_format = tsv_pepXML_pin # File format of output files (tsv, pin, pepxml, tsv_pin, tsv_pepxml, pepxml_pin, or tsv_pepxml_pin). +output_format = pepXML_pin # File format of output files (tsv, pin, pepxml, tsv_pin, tsv_pepxml, pepxml_pin, or tsv_pepxml_pin). output_report_topN = 1 # Reports top N PSMs per input spectrum. output_max_expect = 50 # Suppresses reporting of PSM if top hit has expectation value greater than this threshold. report_alternative_proteins = 1 # Report alternative proteins for peptides that are found in multiple proteins (0 for no, 1 for yes). @@ -94,7 +100,6 @@ max_fragment_charge = 2 # Maximum charge state for theoretical fragments to ma track_zero_topN = 0 # Track top N unmodified peptide results separately from main results internally for boosting features. zero_bin_accept_expect = 0 # Ranks a zero-bin hit above all non-zero-bin hit if it has expectation less than this value. zero_bin_mult_expect = 1 # Multiplies expect value of PSMs in the zero-bin during results ordering (set to less than 1 for boosting). -add_topN_complementary = 0 # Inserts complementary ions corresponding to the top N most intense fragments in each experimental spectra. check_spectral_files = 1 # Checking spectral files before searching. minimum_peaks = 15 # Minimum number of peaks in experimental spectrum for matching. diff --git a/test/params/fragger_extracted_params.csv b/test/params/fragger_extracted_params.csv new file mode 100644 index 00000000..97b606ef --- /dev/null +++ b/test/params/fragger_extracted_params.csv @@ -0,0 +1,20 @@ +,0 +software_name,FragPipe +software_version,21.0 +search_engine,MSFragger +search_engine_version,4.0 +ident_fdr_psm,0.01 +ident_fdr_peptide,0.01 +ident_fdr_protein,0.01 +enable_match_between_runs,True +precursor_mass_tolerance,20 +fragment_mass_tolerance,20 +enzyme,stricttrypsin +allowed_miscleavages,2 +min_peptide_length,7 +max_peptide_length,50 +fixed_mods,"0.0,C-Term Peptide,true,-1; 0.0,N-Term Peptide,true,-1; 0.0,C-Term Protein,true,-1; 0.0,N-Term Protein,true,-1; 0.0,G (glycine),true,-1; 0.0,A (alanine),true,-1; 0.0,S (serine),true,-1; 0.0,P (proline),true,-1; 0.0,V (valine),true,-1; 0.0,T (threonine),true,-1; 57.02146,C (cysteine),true,-1; 0.0,L (leucine),true,-1; 0.0,I (isoleucine),true,-1; 0.0,N (asparagine),true,-1; 0.0,D (aspartic acid),true,-1; 0.0,Q (glutamine),true,-1; 0.0,K (lysine),true,-1; 0.0,E (glutamic acid),true,-1; 0.0,M (methionine),true,-1; 0.0,H (histidine),true,-1; 0.0,F (phenylalanine),true,-1; 0.0,R (arginine),true,-1; 0.0,Y (tyrosine),true,-1; 0.0,W (tryptophan),true,-1; 0.0,B ,true,-1; 0.0,J,true,-1; 0.0,O,true,-1; 0.0,U,true,-1; 0.0,X,true,-1; 0.0,Z,true,-1" +variable_mods,"15.9949,M,true,3; 42.0106,[^,true,1; 79.96633,STY,false,3; -17.0265,nQnC,false,1; -18.0106,nE,false,1; 4.025107,K,false,2; 6.020129,R,false,2; 8.014199,K,false,2; 10.008269,R,false,2; 0.0,site_10,false,1; 0.0,site_11,false,1; 0.0,site_12,false,1; 0.0,site_13,false,1; 0.0,site_14,false,1; 0.0,site_15,false,1; 0.0,site_16,false,1" +max_mods,3 +min_precursor_charge,1 +max_precursor_charge,4 diff --git a/test/params/fragpipe.csv b/test/params/fragpipe.csv new file mode 100644 index 00000000..21a523f9 --- /dev/null +++ b/test/params/fragpipe.csv @@ -0,0 +1,328 @@ +name,value,comment +database.db-path,/mnt/PUMA/ProjectSQ/ASchmidt/KlemensErwinFrohlich_568/20230131-143622_Analysis/ProteoBench/proteobench/Module_2_DDA_quantification/FASTA/2023-12-16-decoys-BenchmarkFASTAModule2_DDA.fasta.fas, +crystalc.run-crystalc,false, +database.decoy-tag,rev_, +diann.fragpipe.cmd-opts,, +diann.generate-msstats,true, +diann.heavy,, +diann.library,, +diann.light,, +diann.medium,, +diann.q-value,0.01, +diann.quantification-strategy,3, +diann.quantification-strategy-2,QuantUMS (high accuracy), +diann.run-dia-nn,false, +diann.run-dia-plex,false, +diann.run-specific-protein-q-value,false, +diann.unrelated-runs,false, +diann.use-predicted-spectra,true, +diaumpire.AdjustFragIntensity,true, +diaumpire.BoostComplementaryIon,false, +diaumpire.CorrThreshold,0, +diaumpire.DeltaApex,0.2, +diaumpire.ExportPrecursorPeak,false, +diaumpire.Q1,true, +diaumpire.Q2,true, +diaumpire.Q3,true, +diaumpire.RFmax,500, +diaumpire.RPmax,25, +diaumpire.RTOverlap,0.3, +diaumpire.SE.EstimateBG,false, +diaumpire.SE.IsoPattern,0.3, +diaumpire.SE.MS1PPM,10, +diaumpire.SE.MS2PPM,20, +diaumpire.SE.MS2SN,1.1, +diaumpire.SE.MassDefectFilter,true, +diaumpire.SE.MassDefectOffset,0.1, +diaumpire.SE.NoMissedScan,1, +diaumpire.SE.SN,1.1, +diaumpire.run-diaumpire,false, +fpop.fpop-tmt,false, +fpop.label_control,, +fpop.label_fpop,, +fpop.region_size,1, +fpop.run-fpop,false, +fpop.subtract-control,false, +fragpipe-config.bin-diann,/mnt/DATA/software/fragpipe/tools/diann/1.8.2_beta_8/linux/diann-1.8.1.8, +fragpipe-config.bin-ionquant,/mnt/DATA/software/fragpipe/tools/IonQuant-1.10.12.jar, +fragpipe-config.bin-msfragger,/mnt/DATA/software/fragpipe/tools/MSFragger-4.0/MSFragger-4.0.jar, +fragpipe-config.bin-philosopher,/mnt/DATA/software/fragpipe/tools/philosopher_v5.1.0_linux_amd64/philosopher, +fragpipe-config.bin-python,/mnt/DATA/software/anaconda/bin/python3, +freequant.mz-tol,10, +freequant.rt-tol,0.4, +freequant.run-freequant,false, +ionquant.excludemods,, +ionquant.heavy,, +ionquant.imtol,0.05, +ionquant.ionfdr,0.01, +ionquant.light,, +ionquant.locprob,0.75, +ionquant.maxlfq,1, +ionquant.mbr,1, +ionquant.mbrimtol,0.05, +ionquant.mbrmincorr,0, +ionquant.mbrrttol,1, +ionquant.mbrtoprun,10, +ionquant.medium,, +ionquant.minfreq,0, +ionquant.minions,2, +ionquant.minisotopes,2, +ionquant.minscans,3, +ionquant.mztol,10, +ionquant.normalization,1, +ionquant.peptidefdr,1, +ionquant.proteinfdr,1, +ionquant.requantify,1, +ionquant.rttol,0.4, +ionquant.run-ionquant,true, +ionquant.tp,0, +ionquant.uniqueness,0, +ionquant.use-labeling,false, +ionquant.use-lfq,true, +ionquant.writeindex,0, +msbooster.predict-rt,true, +msbooster.predict-spectra,true, +msbooster.run-msbooster,true, +msbooster.use-correlated-features,false, +msfragger.Y_type_masses,, +msfragger.activation_types,all, +msfragger.allowed_missed_cleavage_1,2, +msfragger.allowed_missed_cleavage_2,2, +msfragger.calibrate_mass,2, +msfragger.check_spectral_files,true, +msfragger.clip_nTerm_M,true, +msfragger.deisotope,1, +msfragger.delta_mass_exclude_ranges,"(-1.5,3.5)", +msfragger.deneutralloss,1, +msfragger.diagnostic_fragments,, +msfragger.diagnostic_intensity_filter,0, +msfragger.digest_max_length,50, +msfragger.digest_min_length,7, +msfragger.fragment_ion_series,"b,y", +msfragger.fragment_mass_tolerance,20, +msfragger.fragment_mass_units,1, +msfragger.group_variable,0, +msfragger.intensity_transform,0, +msfragger.ion_series_definitions,, +msfragger.isotope_error,0/1/2, +msfragger.labile_search_mode,off, +msfragger.localize_delta_mass,false, +msfragger.mass_diff_to_variable_mod,0, +msfragger.mass_offsets,0, +msfragger.mass_offsets_detailed,, +msfragger.max_fragment_charge,2, +msfragger.max_variable_mods_combinations,5000, +msfragger.max_variable_mods_per_peptide,3, +msfragger.min_fragments_modelling,2, +msfragger.min_matched_fragments,4, +msfragger.min_sequence_matches,2, +msfragger.minimum_peaks,15, +msfragger.minimum_ratio,0.01, +msfragger.misc.fragger.clear-mz-hi,0, +msfragger.misc.fragger.clear-mz-lo,0, +msfragger.misc.fragger.digest-mass-hi,5000, +msfragger.misc.fragger.digest-mass-lo,500, +msfragger.misc.fragger.enzyme-dropdown-1,stricttrypsin, +msfragger.misc.fragger.enzyme-dropdown-2,null, +msfragger.misc.fragger.precursor-charge-hi,4, +msfragger.misc.fragger.precursor-charge-lo,1, +msfragger.misc.fragger.remove-precursor-range-hi,1.5, +msfragger.misc.fragger.remove-precursor-range-lo,-1.5, +msfragger.misc.slice-db,1, +msfragger.num_enzyme_termini,2, +msfragger.output_format,pepXML_pin, +msfragger.output_max_expect,50, +msfragger.output_report_topN,1, +msfragger.output_report_topN_dda_plus,5, +msfragger.output_report_topN_dia1,5, +msfragger.override_charge,false, +msfragger.precursor_mass_lower,-20, +msfragger.precursor_mass_mode,selected, +msfragger.precursor_mass_units,1, +msfragger.precursor_mass_upper,20, +msfragger.precursor_true_tolerance,20, +msfragger.precursor_true_units,1, +msfragger.remainder_fragment_masses,, +msfragger.remove_precursor_peak,1, +msfragger.report_alternative_proteins,true, +msfragger.require_precursor,true, +msfragger.restrict_deltamass_to,all, +msfragger.reuse_dia_fragment_peaks,false, +msfragger.run-msfragger,true, +msfragger.search_enzyme_cut_1,KR, +msfragger.search_enzyme_cut_2,, +msfragger.search_enzyme_name_1,stricttrypsin, +msfragger.search_enzyme_name_2,null, +msfragger.search_enzyme_nocut_1,, +msfragger.search_enzyme_nocut_2,, +msfragger.search_enzyme_sense_1,C, +msfragger.search_enzyme_sense_2,C, +msfragger.table.fix-mods,"0.0,C-Term Peptide,true,-1; 0.0,N-Term Peptide,true,-1; 0.0,C-Term Protein,true,-1; 0.0,N-Term Protein,true,-1; 0.0,G (glycine),true,-1; 0.0,A (alanine),true,-1; 0.0,S (serine),true,-1; 0.0,P (proline),true,-1; 0.0,V (valine),true,-1; 0.0,T (threonine),true,-1; 57.02146,C (cysteine),true,-1; 0.0,L (leucine),true,-1; 0.0,I (isoleucine),true,-1; 0.0,N (asparagine),true,-1; 0.0,D (aspartic acid),true,-1; 0.0,Q (glutamine),true,-1; 0.0,K (lysine),true,-1; 0.0,E (glutamic acid),true,-1; 0.0,M (methionine),true,-1; 0.0,H (histidine),true,-1; 0.0,F (phenylalanine),true,-1; 0.0,R (arginine),true,-1; 0.0,Y (tyrosine),true,-1; 0.0,W (tryptophan),true,-1; 0.0,B ,true,-1; 0.0,J,true,-1; 0.0,O,true,-1; 0.0,U,true,-1; 0.0,X,true,-1; 0.0,Z,true,-1", +msfragger.table.var-mods,"15.9949,M,true,3; 42.0106,[^,true,1; 79.96633,STY,false,3; -17.0265,nQnC,false,1; -18.0106,nE,false,1; 4.025107,K,false,2; 6.020129,R,false,2; 8.014199,K,false,2; 10.008269,R,false,2; 0.0,site_10,false,1; 0.0,site_11,false,1; 0.0,site_12,false,1; 0.0,site_13,false,1; 0.0,site_14,false,1; 0.0,site_15,false,1; 0.0,site_16,false,1", +msfragger.track_zero_topN,0, +msfragger.use_all_mods_in_first_search,false, +msfragger.use_detailed_offsets,false, +msfragger.use_topN_peaks,150, +msfragger.write_calibrated_mzml,false, +msfragger.write_uncalibrated_mgf,false, +msfragger.zero_bin_accept_expect,0, +msfragger.zero_bin_mult_expect,1, +opair.activation1,HCD, +opair.activation2,ETD, +opair.filterOxonium,true, +opair.glyco_db,, +opair.max_glycans,4, +opair.max_isotope_error,2, +opair.min_isotope_error,0, +opair.ms1_tol,20, +opair.ms2_tol,20, +opair.oxonium_filtering_file,, +opair.oxonium_minimum_intensity,0.05, +opair.reverse_scan_order,false, +opair.run-opair,false, +opair.single_scan_type,false, +peptide-prophet.cmd-opts,--decoyprobs --ppm --accmass --nonparam --expectscore, +peptide-prophet.combine-pepxml,false, +peptide-prophet.run-peptide-prophet,false, +percolator.cmd-opts,--only-psms --no-terminate --post-processing-tdc, +percolator.keep-tsv-files,false, +percolator.min-prob,0.5, +percolator.run-percolator,true, +phi-report.dont-use-prot-proph-file,false, +phi-report.filter,--sequential --prot 0.01, +phi-report.pep-level-summary,false, +phi-report.philosoher-msstats,false, +phi-report.print-decoys,false, +phi-report.prot-level-summary,false, +phi-report.remove-contaminants,false, +phi-report.run-report,true, +protein-prophet.cmd-opts,--maxppmdiff 2000000, +protein-prophet.run-protein-prophet,true, +ptmprophet.cmdline,"KEEPOLD STATIC EM\=1 NIONS\=b M\:15.9949,n\:42.0106 MINPROB\=0.5", +ptmprophet.run-ptmprophet,false, +ptmshepherd.adv_params,false, +ptmshepherd.annotation-common,false, +ptmshepherd.annotation-custom,false, +ptmshepherd.annotation-glyco,false, +ptmshepherd.annotation-unimod,true, +ptmshepherd.annotation_file,, +ptmshepherd.annotation_tol,0.01, +ptmshepherd.cap_y_ions,, +ptmshepherd.decoy_type,1, +ptmshepherd.diag_ions,, +ptmshepherd.diagmine_diagMinFoldChange,3.0, +ptmshepherd.diagmine_diagMinSpecDiff,00.2, +ptmshepherd.diagmine_fragMinFoldChange,3.0, +ptmshepherd.diagmine_fragMinPropensity,00.1, +ptmshepherd.diagmine_fragMinSpecDiff,00.1, +ptmshepherd.diagmine_minIonsPerSpec,2, +ptmshepherd.diagmine_minPeps,25, +ptmshepherd.diagmine_pepMinFoldChange,3.0, +ptmshepherd.diagmine_pepMinSpecDiff,00.2, +ptmshepherd.glyco_adducts,, +ptmshepherd.glyco_fdr,1.00, +ptmshepherd.glyco_isotope_max,3, +ptmshepherd.glyco_isotope_min,-1, +ptmshepherd.glyco_ppm_tol,50, +ptmshepherd.glycodatabase,, +ptmshepherd.histo_smoothbins,2, +ptmshepherd.iontype_a,false, +ptmshepherd.iontype_b,true, +ptmshepherd.iontype_c,false, +ptmshepherd.iontype_x,false, +ptmshepherd.iontype_y,true, +ptmshepherd.iontype_z,false, +ptmshepherd.localization_allowed_res,, +ptmshepherd.localization_background,4, +ptmshepherd.max_adducts,0, +ptmshepherd.n_glyco,true, +ptmshepherd.normalization-psms,true, +ptmshepherd.normalization-scans,false, +ptmshepherd.output_extended,false, +ptmshepherd.peakpicking_mass_units,0, +ptmshepherd.peakpicking_minPsm,10, +ptmshepherd.peakpicking_promRatio,0.3, +ptmshepherd.peakpicking_width,0.002, +ptmshepherd.precursor_mass_units,0, +ptmshepherd.precursor_tol,0.01, +ptmshepherd.print_decoys,false, +ptmshepherd.prob_dhexOx,"2,0.5,0.1", +ptmshepherd.prob_dhexY,"2,0.5", +ptmshepherd.prob_neuacOx,"2,0.05,0.2", +ptmshepherd.prob_neugcOx,"2,0.05,0.2", +ptmshepherd.prob_phosphoOx,"2,0.05,0.2", +ptmshepherd.prob_regY,"5,0.5", +ptmshepherd.prob_sulfoOx,"2,0.05,0.2", +ptmshepherd.remainder_masses,, +ptmshepherd.remove_glycan_delta_mass,true, +ptmshepherd.run-shepherd,false, +ptmshepherd.run_diagextract_mode,false, +ptmshepherd.run_diagmine_mode,false, +ptmshepherd.run_glyco_mode,false, +ptmshepherd.spectra_maxfragcharge,2, +ptmshepherd.spectra_ppmtol,20, +ptmshepherd.varmod_masses,, +quantitation.run-label-free-quant,true, +run-psm-validation,true, +run-validation-tab,true, +saintexpress.fragpipe.cmd-opts,, +saintexpress.max-replicates,10, +saintexpress.run-saint-express,false, +saintexpress.virtual-controls,100, +speclibgen.easypqp.extras.max_delta_ppm,15, +speclibgen.easypqp.extras.max_delta_unimod,0.02, +speclibgen.easypqp.extras.rt_lowess_fraction,0, +speclibgen.easypqp.fragment.a,false, +speclibgen.easypqp.fragment.b,true, +speclibgen.easypqp.fragment.c,false, +speclibgen.easypqp.fragment.x,false, +speclibgen.easypqp.fragment.y,true, +speclibgen.easypqp.fragment.z,false, +speclibgen.easypqp.im-cal,Automatic selection of a run as reference IM, +speclibgen.easypqp.neutral_loss,false, +speclibgen.easypqp.rt-cal,noiRT, +speclibgen.easypqp.select-file.text,, +speclibgen.easypqp.select-im-file.text,, +speclibgen.keep-intermediate-files,false, +speclibgen.run-speclibgen,false, +tab-run.delete_calibrated_mzml,false, +tab-run.delete_temp_files,false, +tab-run.sub_mzml_prob_threshold,0.5, +tab-run.write_sub_mzml,false, +tmtintegrator.add_Ref,-1, +tmtintegrator.aggregation_method,0, +tmtintegrator.allow_overlabel,true, +tmtintegrator.allow_unlabeled,true, +tmtintegrator.best_psm,true, +tmtintegrator.channel_num,TMT-6, +tmtintegrator.extraction_tool,IonQuant, +tmtintegrator.glyco_qval,-1, +tmtintegrator.groupby,0, +tmtintegrator.log2transformed,true, +tmtintegrator.max_pep_prob_thres,0, +tmtintegrator.min_ntt,0, +tmtintegrator.min_pep_prob,0.9, +tmtintegrator.min_percent,0.05, +tmtintegrator.min_purity,0.5, +tmtintegrator.min_site_prob,-1, +tmtintegrator.mod_tag,none, +tmtintegrator.ms1_int,true, +tmtintegrator.outlier_removal,true, +tmtintegrator.print_RefInt,false, +tmtintegrator.prot_exclude,none, +tmtintegrator.prot_norm,0, +tmtintegrator.psm_norm,false, +tmtintegrator.quant_level,2, +tmtintegrator.ref_tag,Bridge, +tmtintegrator.run-tmtintegrator,false, +tmtintegrator.tolerance,20, +tmtintegrator.top3_pep,true, +tmtintegrator.unique_gene,0, +tmtintegrator.unique_pep,false, +tmtintegrator.use_glycan_composition,false, +workdir,/home/pcfuser/ProjectSQ/ASchmidt/KlemensErwinFrohlich_568/20230131-143622_Analysis/ProteoBench/own_analyses/DDA_FragPipe_LFQ_MBR_Opt_FileName, +workflow.input.data-type.im-ms,false, +workflow.input.data-type.regular-ms,true, +workflow.misc.save-sdrf,true, +workflow.ram,0, +workflow.threads,50, diff --git a/test/params/fragpipe.workflow b/test/params/fragpipe.workflow new file mode 100644 index 00000000..482f35f6 --- /dev/null +++ b/test/params/fragpipe.workflow @@ -0,0 +1,333 @@ +# FragPipe (21.0) runtime properties + + +# Please edit the following path to point to the correct location. +# In Windows, please replace single '\' with '\\' +database.db-path=/mnt/PUMA/ProjectSQ/ASchmidt/KlemensErwinFrohlich_568/20230131-143622_Analysis/ProteoBench/proteobench/Module_2_DDA_quantification/FASTA/2023-12-16-decoys-BenchmarkFASTAModule2_DDA.fasta.fas + +crystalc.run-crystalc=false +database.decoy-tag=rev_ +diann.fragpipe.cmd-opts= +diann.generate-msstats=true +diann.heavy= +diann.library= +diann.light= +diann.medium= +diann.q-value=0.01 +diann.quantification-strategy=3 +diann.quantification-strategy-2=QuantUMS (high accuracy) +diann.run-dia-nn=false +diann.run-dia-plex=false +diann.run-specific-protein-q-value=false +diann.unrelated-runs=false +diann.use-predicted-spectra=true +diaumpire.AdjustFragIntensity=true +diaumpire.BoostComplementaryIon=false +diaumpire.CorrThreshold=0 +diaumpire.DeltaApex=0.2 +diaumpire.ExportPrecursorPeak=false +diaumpire.Q1=true +diaumpire.Q2=true +diaumpire.Q3=true +diaumpire.RFmax=500 +diaumpire.RPmax=25 +diaumpire.RTOverlap=0.3 +diaumpire.SE.EstimateBG=false +diaumpire.SE.IsoPattern=0.3 +diaumpire.SE.MS1PPM=10 +diaumpire.SE.MS2PPM=20 +diaumpire.SE.MS2SN=1.1 +diaumpire.SE.MassDefectFilter=true +diaumpire.SE.MassDefectOffset=0.1 +diaumpire.SE.NoMissedScan=1 +diaumpire.SE.SN=1.1 +diaumpire.run-diaumpire=false +fpop.fpop-tmt=false +fpop.label_control= +fpop.label_fpop= +fpop.region_size=1 +fpop.run-fpop=false +fpop.subtract-control=false +fragpipe-config.bin-diann=/mnt/DATA/software/fragpipe/tools/diann/1.8.2_beta_8/linux/diann-1.8.1.8 +fragpipe-config.bin-ionquant=/mnt/DATA/software/fragpipe/tools/IonQuant-1.10.12.jar +fragpipe-config.bin-msfragger=/mnt/DATA/software/fragpipe/tools/MSFragger-4.0/MSFragger-4.0.jar +fragpipe-config.bin-philosopher=/mnt/DATA/software/fragpipe/tools/philosopher_v5.1.0_linux_amd64/philosopher +fragpipe-config.bin-python=/mnt/DATA/software/anaconda/bin/python3 +freequant.mz-tol=10 +freequant.rt-tol=0.4 +freequant.run-freequant=false +ionquant.excludemods= +ionquant.heavy= +ionquant.imtol=0.05 +ionquant.ionfdr=0.01 +ionquant.light= +ionquant.locprob=0.75 +ionquant.maxlfq=1 +ionquant.mbr=1 +ionquant.mbrimtol=0.05 +ionquant.mbrmincorr=0 +ionquant.mbrrttol=1 +ionquant.mbrtoprun=10 +ionquant.medium= +ionquant.minfreq=0 +ionquant.minions=2 +ionquant.minisotopes=2 +ionquant.minscans=3 +ionquant.mztol=10 +ionquant.normalization=1 +ionquant.peptidefdr=1 +ionquant.proteinfdr=1 +ionquant.requantify=1 +ionquant.rttol=0.4 +ionquant.run-ionquant=true +ionquant.tp=0 +ionquant.uniqueness=0 +ionquant.use-labeling=false +ionquant.use-lfq=true +ionquant.writeindex=0 +msbooster.predict-rt=true +msbooster.predict-spectra=true +msbooster.run-msbooster=true +msbooster.use-correlated-features=false +msfragger.Y_type_masses= +msfragger.activation_types=all +msfragger.allowed_missed_cleavage_1=2 +msfragger.allowed_missed_cleavage_2=2 +msfragger.calibrate_mass=2 +msfragger.check_spectral_files=true +msfragger.clip_nTerm_M=true +msfragger.deisotope=1 +msfragger.delta_mass_exclude_ranges=(-1.5,3.5) +msfragger.deneutralloss=1 +msfragger.diagnostic_fragments= +msfragger.diagnostic_intensity_filter=0 +msfragger.digest_max_length=50 +msfragger.digest_min_length=7 +msfragger.fragment_ion_series=b,y +msfragger.fragment_mass_tolerance=20 +msfragger.fragment_mass_units=1 +msfragger.group_variable=0 +msfragger.intensity_transform=0 +msfragger.ion_series_definitions= +msfragger.isotope_error=0/1/2 +msfragger.labile_search_mode=off +msfragger.localize_delta_mass=false +msfragger.mass_diff_to_variable_mod=0 +msfragger.mass_offsets=0 +msfragger.mass_offsets_detailed= +msfragger.max_fragment_charge=2 +msfragger.max_variable_mods_combinations=5000 +msfragger.max_variable_mods_per_peptide=3 +msfragger.min_fragments_modelling=2 +msfragger.min_matched_fragments=4 +msfragger.min_sequence_matches=2 +msfragger.minimum_peaks=15 +msfragger.minimum_ratio=0.01 +msfragger.misc.fragger.clear-mz-hi=0 +msfragger.misc.fragger.clear-mz-lo=0 +msfragger.misc.fragger.digest-mass-hi=5000 +msfragger.misc.fragger.digest-mass-lo=500 +msfragger.misc.fragger.enzyme-dropdown-1=stricttrypsin +msfragger.misc.fragger.enzyme-dropdown-2=null +msfragger.misc.fragger.precursor-charge-hi=4 +msfragger.misc.fragger.precursor-charge-lo=1 +msfragger.misc.fragger.remove-precursor-range-hi=1.5 +msfragger.misc.fragger.remove-precursor-range-lo=-1.5 +msfragger.misc.slice-db=1 +msfragger.num_enzyme_termini=2 +msfragger.output_format=pepXML_pin +msfragger.output_max_expect=50 +msfragger.output_report_topN=1 +msfragger.output_report_topN_dda_plus=5 +msfragger.output_report_topN_dia1=5 +msfragger.override_charge=false +msfragger.precursor_mass_lower=-20 +msfragger.precursor_mass_mode=selected +msfragger.precursor_mass_units=1 +msfragger.precursor_mass_upper=20 +msfragger.precursor_true_tolerance=20 +msfragger.precursor_true_units=1 +msfragger.remainder_fragment_masses= +msfragger.remove_precursor_peak=1 +msfragger.report_alternative_proteins=true +msfragger.require_precursor=true +msfragger.restrict_deltamass_to=all +msfragger.reuse_dia_fragment_peaks=false +msfragger.run-msfragger=true +msfragger.search_enzyme_cut_1=KR +msfragger.search_enzyme_cut_2= +msfragger.search_enzyme_name_1=stricttrypsin +msfragger.search_enzyme_name_2=null +msfragger.search_enzyme_nocut_1= +msfragger.search_enzyme_nocut_2= +msfragger.search_enzyme_sense_1=C +msfragger.search_enzyme_sense_2=C +msfragger.table.fix-mods=0.0,C-Term Peptide,true,-1; 0.0,N-Term Peptide,true,-1; 0.0,C-Term Protein,true,-1; 0.0,N-Term Protein,true,-1; 0.0,G (glycine),true,-1; 0.0,A (alanine),true,-1; 0.0,S (serine),true,-1; 0.0,P (proline),true,-1; 0.0,V (valine),true,-1; 0.0,T (threonine),true,-1; 57.02146,C (cysteine),true,-1; 0.0,L (leucine),true,-1; 0.0,I (isoleucine),true,-1; 0.0,N (asparagine),true,-1; 0.0,D (aspartic acid),true,-1; 0.0,Q (glutamine),true,-1; 0.0,K (lysine),true,-1; 0.0,E (glutamic acid),true,-1; 0.0,M (methionine),true,-1; 0.0,H (histidine),true,-1; 0.0,F (phenylalanine),true,-1; 0.0,R (arginine),true,-1; 0.0,Y (tyrosine),true,-1; 0.0,W (tryptophan),true,-1; 0.0,B ,true,-1; 0.0,J,true,-1; 0.0,O,true,-1; 0.0,U,true,-1; 0.0,X,true,-1; 0.0,Z,true,-1 +msfragger.table.var-mods=15.9949,M,true,3; 42.0106,[^,true,1; 79.96633,STY,false,3; -17.0265,nQnC,false,1; -18.0106,nE,false,1; 4.025107,K,false,2; 6.020129,R,false,2; 8.014199,K,false,2; 10.008269,R,false,2; 0.0,site_10,false,1; 0.0,site_11,false,1; 0.0,site_12,false,1; 0.0,site_13,false,1; 0.0,site_14,false,1; 0.0,site_15,false,1; 0.0,site_16,false,1 +msfragger.track_zero_topN=0 +msfragger.use_all_mods_in_first_search=false +msfragger.use_detailed_offsets=false +msfragger.use_topN_peaks=150 +msfragger.write_calibrated_mzml=false +msfragger.write_uncalibrated_mgf=false +msfragger.zero_bin_accept_expect=0 +msfragger.zero_bin_mult_expect=1 +opair.activation1=HCD +opair.activation2=ETD +opair.filterOxonium=true +opair.glyco_db= +opair.max_glycans=4 +opair.max_isotope_error=2 +opair.min_isotope_error=0 +opair.ms1_tol=20 +opair.ms2_tol=20 +opair.oxonium_filtering_file= +opair.oxonium_minimum_intensity=0.05 +opair.reverse_scan_order=false +opair.run-opair=false +opair.single_scan_type=false +peptide-prophet.cmd-opts=--decoyprobs --ppm --accmass --nonparam --expectscore +peptide-prophet.combine-pepxml=false +peptide-prophet.run-peptide-prophet=false +percolator.cmd-opts=--only-psms --no-terminate --post-processing-tdc +percolator.keep-tsv-files=false +percolator.min-prob=0.5 +percolator.run-percolator=true +phi-report.dont-use-prot-proph-file=false +phi-report.filter=--sequential --prot 0.01 +phi-report.pep-level-summary=false +phi-report.philosoher-msstats=false +phi-report.print-decoys=false +phi-report.prot-level-summary=false +phi-report.remove-contaminants=false +phi-report.run-report=true +protein-prophet.cmd-opts=--maxppmdiff 2000000 +protein-prophet.run-protein-prophet=true +ptmprophet.cmdline=KEEPOLD STATIC EM\=1 NIONS\=b M\:15.9949,n\:42.0106 MINPROB\=0.5 +ptmprophet.run-ptmprophet=false +ptmshepherd.adv_params=false +ptmshepherd.annotation-common=false +ptmshepherd.annotation-custom=false +ptmshepherd.annotation-glyco=false +ptmshepherd.annotation-unimod=true +ptmshepherd.annotation_file= +ptmshepherd.annotation_tol=0.01 +ptmshepherd.cap_y_ions= +ptmshepherd.decoy_type=1 +ptmshepherd.diag_ions= +ptmshepherd.diagmine_diagMinFoldChange=3.0 +ptmshepherd.diagmine_diagMinSpecDiff=00.2 +ptmshepherd.diagmine_fragMinFoldChange=3.0 +ptmshepherd.diagmine_fragMinPropensity=00.1 +ptmshepherd.diagmine_fragMinSpecDiff=00.1 +ptmshepherd.diagmine_minIonsPerSpec=2 +ptmshepherd.diagmine_minPeps=25 +ptmshepherd.diagmine_pepMinFoldChange=3.0 +ptmshepherd.diagmine_pepMinSpecDiff=00.2 +ptmshepherd.glyco_adducts= +ptmshepherd.glyco_fdr=1.00 +ptmshepherd.glyco_isotope_max=3 +ptmshepherd.glyco_isotope_min=-1 +ptmshepherd.glyco_ppm_tol=50 +ptmshepherd.glycodatabase= +ptmshepherd.histo_smoothbins=2 +ptmshepherd.iontype_a=false +ptmshepherd.iontype_b=true +ptmshepherd.iontype_c=false +ptmshepherd.iontype_x=false +ptmshepherd.iontype_y=true +ptmshepherd.iontype_z=false +ptmshepherd.localization_allowed_res= +ptmshepherd.localization_background=4 +ptmshepherd.max_adducts=0 +ptmshepherd.n_glyco=true +ptmshepherd.normalization-psms=true +ptmshepherd.normalization-scans=false +ptmshepherd.output_extended=false +ptmshepherd.peakpicking_mass_units=0 +ptmshepherd.peakpicking_minPsm=10 +ptmshepherd.peakpicking_promRatio=0.3 +ptmshepherd.peakpicking_width=0.002 +ptmshepherd.precursor_mass_units=0 +ptmshepherd.precursor_tol=0.01 +ptmshepherd.print_decoys=false +ptmshepherd.prob_dhexOx=2,0.5,0.1 +ptmshepherd.prob_dhexY=2,0.5 +ptmshepherd.prob_neuacOx=2,0.05,0.2 +ptmshepherd.prob_neugcOx=2,0.05,0.2 +ptmshepherd.prob_phosphoOx=2,0.05,0.2 +ptmshepherd.prob_regY=5,0.5 +ptmshepherd.prob_sulfoOx=2,0.05,0.2 +ptmshepherd.remainder_masses= +ptmshepherd.remove_glycan_delta_mass=true +ptmshepherd.run-shepherd=false +ptmshepherd.run_diagextract_mode=false +ptmshepherd.run_diagmine_mode=false +ptmshepherd.run_glyco_mode=false +ptmshepherd.spectra_maxfragcharge=2 +ptmshepherd.spectra_ppmtol=20 +ptmshepherd.varmod_masses= +quantitation.run-label-free-quant=true +run-psm-validation=true +run-validation-tab=true +saintexpress.fragpipe.cmd-opts= +saintexpress.max-replicates=10 +saintexpress.run-saint-express=false +saintexpress.virtual-controls=100 +speclibgen.easypqp.extras.max_delta_ppm=15 +speclibgen.easypqp.extras.max_delta_unimod=0.02 +speclibgen.easypqp.extras.rt_lowess_fraction=0 +speclibgen.easypqp.fragment.a=false +speclibgen.easypqp.fragment.b=true +speclibgen.easypqp.fragment.c=false +speclibgen.easypqp.fragment.x=false +speclibgen.easypqp.fragment.y=true +speclibgen.easypqp.fragment.z=false +speclibgen.easypqp.im-cal=Automatic selection of a run as reference IM +speclibgen.easypqp.neutral_loss=false +speclibgen.easypqp.rt-cal=noiRT +speclibgen.easypqp.select-file.text= +speclibgen.easypqp.select-im-file.text= +speclibgen.keep-intermediate-files=false +speclibgen.run-speclibgen=false +tab-run.delete_calibrated_mzml=false +tab-run.delete_temp_files=false +tab-run.sub_mzml_prob_threshold=0.5 +tab-run.write_sub_mzml=false +tmtintegrator.add_Ref=-1 +tmtintegrator.aggregation_method=0 +tmtintegrator.allow_overlabel=true +tmtintegrator.allow_unlabeled=true +tmtintegrator.best_psm=true +tmtintegrator.channel_num=TMT-6 +tmtintegrator.extraction_tool=IonQuant +tmtintegrator.glyco_qval=-1 +tmtintegrator.groupby=0 +tmtintegrator.log2transformed=true +tmtintegrator.max_pep_prob_thres=0 +tmtintegrator.min_ntt=0 +tmtintegrator.min_pep_prob=0.9 +tmtintegrator.min_percent=0.05 +tmtintegrator.min_purity=0.5 +tmtintegrator.min_site_prob=-1 +tmtintegrator.mod_tag=none +tmtintegrator.ms1_int=true +tmtintegrator.outlier_removal=true +tmtintegrator.print_RefInt=false +tmtintegrator.prot_exclude=none +tmtintegrator.prot_norm=0 +tmtintegrator.psm_norm=false +tmtintegrator.quant_level=2 +tmtintegrator.ref_tag=Bridge +tmtintegrator.run-tmtintegrator=false +tmtintegrator.tolerance=20 +tmtintegrator.top3_pep=true +tmtintegrator.unique_gene=0 +tmtintegrator.unique_pep=false +tmtintegrator.use_glycan_composition=false +workdir=/home/pcfuser/ProjectSQ/ASchmidt/KlemensErwinFrohlich_568/20230131-143622_Analysis/ProteoBench/own_analyses/DDA_FragPipe_LFQ_MBR_Opt_FileName +workflow.input.data-type.im-ms=false +workflow.input.data-type.regular-ms=true +workflow.misc.save-sdrf=true +workflow.ram=0 +workflow.threads=50 diff --git a/test/test_parse_params_alphapept.py b/test/test_parse_params_alphapept.py index 0bc3c8a9..17cb6e75 100644 --- a/test/test_parse_params_alphapept.py +++ b/test/test_parse_params_alphapept.py @@ -1,5 +1,4 @@ import io -import json from pathlib import Path import pandas as pd diff --git a/test/test_parse_params_fragger.py b/test/test_parse_params_fragger.py index dbc8659c..e708eef4 100644 --- a/test/test_parse_params_fragger.py +++ b/test/test_parse_params_fragger.py @@ -1,13 +1,22 @@ +import io from pathlib import Path import pandas as pd +import pytest import proteobench.io.params.fragger as fragger_params TESTDATA_DIR = Path(__file__).parent / "params" +# ! currently fragpipe with msfragger has two parameter/configuration files per run +fnames = ["fragger.params", "fragpipe.workflow"] +fnames = [TESTDATA_DIR / fname for fname in fnames] -def test_read_file(): +fnames = [(fname, fname.with_suffix(".json")) for fname in fnames] + + +@pytest.mark.parametrize("file,csv_expected", fnames) +def test_read_file(file, csv_expected): file = TESTDATA_DIR / "fragger.params" csv_expected = TESTDATA_DIR / "fragger.csv" expected = pd.read_csv(csv_expected) @@ -16,3 +25,13 @@ def test_read_file(): fragger_params.Parameter._fields[0] ) actual.equals(expected) + + +def test_extract_params(): + file = TESTDATA_DIR / "fragger.params" + f_fragpipe_workflow = TESTDATA_DIR / "fragpipe.workflow" + expected = pd.read_csv(TESTDATA_DIR / "fragger_extracted_params.csv", index_col=0).squeeze("columns") + actual = fragger_params.extract_params(file, f_fragpipe_workflow) + actual = pd.Series(actual.__dict__) + actual = pd.read_csv(io.StringIO(actual.to_csv()), index_col=0).squeeze("columns") + assert expected.equals(actual)