From 3640aefb195fb68b45ca500c9381a2da232804af Mon Sep 17 00:00:00 2001 From: Chris Bielow Date: Thu, 23 Nov 2023 14:12:06 +0100 Subject: [PATCH] Resampler: allow combination of ppm-binning without alignment (used to result in interpreting bins as Da/Th). Rewrite some code, so make it more robust and re-useable. Added test. --- src/tests/topp/CMakeLists.txt | 4 + src/tests/topp/Resampler_1_output.mzML | 8 +- src/tests/topp/Resampler_2_output.mzML | 189 +++++++++++++++++++++++++ src/topp/Resampler.cpp | 78 +++------- 4 files changed, 217 insertions(+), 62 deletions(-) create mode 100644 src/tests/topp/Resampler_2_output.mzML diff --git a/src/tests/topp/CMakeLists.txt b/src/tests/topp/CMakeLists.txt index a61573e6926..4e293fca1ef 100644 --- a/src/tests/topp/CMakeLists.txt +++ b/src/tests/topp/CMakeLists.txt @@ -1926,6 +1926,10 @@ if(WITH_GUI) add_test("TOPP_Resampler_1" ${TOPP_BIN_PATH}/Resampler -test -in ${DATA_DIR_TOPP}/Resampler_1_input.mzML -out Resampler.mzML -sampling_rate 0.3) add_test("TOPP_Resampler_1_out1" ${DIFF} -whitelist ${INDEX_WHITELIST} -in1 Resampler.mzML -in2 ${DATA_DIR_TOPP}/Resampler_1_output.mzML ) set_tests_properties("TOPP_Resampler_1_out1" PROPERTIES DEPENDS "TOPP_Resampler_1") + + add_test("TOPP_Resampler_2" ${TOPP_BIN_PATH}/Resampler -test -in ${DATA_DIR_TOPP}/Resampler_1_input.mzML -out Resampler2_ppm.mzML -ppm -sampling_rate 2000) + add_test("TOPP_Resampler_2_out1" ${DIFF} -whitelist ${INDEX_WHITELIST} -in1 Resampler2_ppm.mzML -in2 ${DATA_DIR_TOPP}/Resampler_2_output.mzML ) + set_tests_properties("TOPP_Resampler_2_out1" PROPERTIES DEPENDS "TOPP_Resampler_2") endif() #------------------------------------------------------------------------------ diff --git a/src/tests/topp/Resampler_1_output.mzML b/src/tests/topp/Resampler_1_output.mzML index f5e713408ae..566ca0e6b17 100644 --- a/src/tests/topp/Resampler_1_output.mzML +++ b/src/tests/topp/Resampler_1_output.mzML @@ -104,7 +104,7 @@ - wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbL30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvqpgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA + wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbb30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvupgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA @@ -129,7 +129,7 @@ - wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbL30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvqpgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA + wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbb30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvupgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA @@ -170,7 +170,7 @@ - wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbL30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvqpgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA + wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbb30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvupgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA @@ -186,4 +186,4 @@ 12669 0 - + \ No newline at end of file diff --git a/src/tests/topp/Resampler_2_output.mzML b/src/tests/topp/Resampler_2_output.mzML new file mode 100644 index 00000000000..1853148d7a9 --- /dev/null +++ b/src/tests/topp/Resampler_2_output.mzML @@ -0,0 +1,189 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AAAAAAAgjUD0/dR46S6NQIusNZTaPY1A+sUKVtNMjUDWBD/C01uNQBolv9zbao1ALeV5qet5jUA= + + + + + + WOdKRFBrZUSDeA9GT5xMRy+it0Z593VFbFtTRQ== + + + + + + + + + + + + + + + + + + + AAAAAAAgjUD0/dR46S6NQIusNZTaPY1A+sUKVtNMjUDWBD/C01uNQBolv9zbao1ALeV5qet5jUA= + + + + + + WOdKRFBrZUSDeA9GT5xMRy+it0Z593VFbFtTRQ== + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AAAAAAAgjUD0/dR46S6NQIusNZTaPY1A+sUKVtNMjUDWBD/C01uNQBolv9zbao1ALeV5qet5jUA= + + + + + + WOdKRFBrZUSDeA9GT5xMRy+it0Z593VFbFtTRQ== + + + + + + + + + 5378 + 6933 + 8460 + + +10848 +0 + \ No newline at end of file diff --git a/src/topp/Resampler.cpp b/src/topp/Resampler.cpp index 83b382f50ca..d1baf42f98e 100644 --- a/src/topp/Resampler.cpp +++ b/src/topp/Resampler.cpp @@ -11,8 +11,8 @@ #include #include #include -#include #include +#include #include @@ -69,10 +69,10 @@ class TOPPResampler : void registerOptionsAndFlags_() override { registerInputFile_("in", "", "", "input file "); - setValidFormats_("in", ListUtils::create("mzML")); - registerOutputFile_("out", "", "", - "output file in mzML format"); - setValidFormats_("out", ListUtils::create("mzML")); + setValidFormats_("in", {"mzML"}); + + registerOutputFile_("out", "", "", "output file in mzML format"); + setValidFormats_("out", {"mzML"}); registerDoubleOption_("sampling_rate", "", 0.1, "New sampling rate in m/z dimension (in Th unless ppm flag is set)", false); @@ -98,83 +98,45 @@ class TOPPResampler : bool align_sampling = getFlag_("align_sampling"); bool ppm = getFlag_("ppm"); PeakMap exp; + exp.updateRanges(); FileHandler().loadExperiment(in, exp, {FileTypes::MZML}, log_type_); Param resampler_param; resampler_param.setValue("spacing", sampling_rate); - if (ppm) resampler_param.setValue("ppm", "true"); - else resampler_param.setValue("ppm", "false"); + resampler_param.setValue("ppm", ppm ? "true" : "false"); + LinearResamplerAlign lin_resampler; // LinearResampler does not know about ppm! + lin_resampler.setParameters(resampler_param); if (!align_sampling) { - LinearResampler lin_resampler; - lin_resampler.setParameters(resampler_param); - // resample every scan for (Size i = 0; i < exp.size(); ++i) { lin_resampler.raster(exp[i]); } } - else + else if(!exp.RangeRT::isEmpty()) { - LinearResamplerAlign lin_resampler; - lin_resampler.setParameters(resampler_param); - - bool start_pos_set = false; - bool end_pos_set = false; - double start_pos = 0.0; - double end_pos = 0.0; - // get max / min positions across whole map - for (Size i = 0; i < exp.size(); ++i) - { - if (!exp[i].empty() && (!start_pos_set || exp[i][0].getMZ() < start_pos) ) - { - start_pos = exp[i][0].getMZ(); - start_pos_set = true; - } - if (!exp[i].empty() && (!end_pos_set || exp[i].back().getMZ() > end_pos) ) - { - end_pos = exp[i].back().getMZ(); - end_pos_set = true; - } - } + // start with even position + auto start_pos = floor(exp.getMinRT()); - if (start_pos_set) + // resample every scan + for (Size i = 0; i < exp.size(); ++i) { - // start with even position - start_pos = std::floor(start_pos); - - // resample every scan - for (Size i = 0; i < exp.size(); ++i) - { - lin_resampler.raster_align(exp[i], start_pos, end_pos); - } + lin_resampler.raster_align(exp[i], start_pos, exp.getMaxRT()); } } if (min_int_cutoff >= 0.0) { - for (Size i = 0; i < exp.size(); ++i) - { - MSSpectrum tmp = exp[i]; - tmp.clear(false); - for (Size j = 0; j < exp[i].size(); j++) - { - if (exp[i][j].getIntensity() > min_int_cutoff) - { - tmp.push_back(exp[i][j]); - } - } - // swap - exp[i] = tmp; - } + ThresholdMower mow; + Param p; + p.setValue("threshold", min_int_cutoff); + mow.setParameters(p); + mow.filterPeakMap(exp); } - //clear meta data because they are no longer meaningful - exp.clearMetaDataArrays(); - //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::DATA_PROCESSING));