From e88b2315a9866ecf873f2c4689033ce7f3461e15 Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Thu, 4 Nov 2021 11:41:15 +0100 Subject: [PATCH 01/10] new branch --- src/openswathalgo/source/OPENSWATHALGO/ALGO/MRMScoring.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/MRMScoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/MRMScoring.cpp index 64a6084fdd8..1609b4522d7 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/MRMScoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/MRMScoring.cpp @@ -808,7 +808,6 @@ namespace OpenSwath for (std::size_t i = 0; i < features.size(); i++) { FeatureType fi = features[i]; - //mi_precursor_combined_matrix_[i].resize(features.size()); intensityi.clear(); fi->getIntensity(intensityi); for (std::size_t j = 0; j < features.size(); j++) From d25139f690f214900fbf0dbc580ba19607c5bdb4 Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Thu, 4 Nov 2021 17:34:04 +0100 Subject: [PATCH 02/10] new computeRanks, only in MIcombinedMatrix --- .../OpenMS/OPENSWATHALGO/ALGO/Scoring.h | 4 ++- .../source/OPENSWATHALGO/ALGO/MRMScoring.cpp | 8 +++-- .../source/OPENSWATHALGO/ALGO/Scoring.cpp | 33 +++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h index 49610e0d03c..695d8618988 100644 --- a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h +++ b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h @@ -131,10 +131,12 @@ namespace OpenSwath OPENSWATHALGO_DLLAPI void normalize_sum(double x[], unsigned int n); // Compute rank of vector elements - OPENSWATHALGO_DLLAPI std::vector computeRank(const std::vector& w); + OPENSWATHALGO_DLLAPI std::vector computeRank(const std::vector& v_temp); + OPENSWATHALGO_DLLAPI void computeRank2(const std::vector& v, std::vector& ranks); // Estimate rank-transformed mutual information between two vectors of data points OPENSWATHALGO_DLLAPI double rankedMutualInformation(std::vector& data1, std::vector& data2); + OPENSWATHALGO_DLLAPI double rankedMutualInformation2(std::vector& data1, std::vector& data2); //@} diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/MRMScoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/MRMScoring.cpp index 1609b4522d7..f3f1340e2b7 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/MRMScoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/MRMScoring.cpp @@ -44,6 +44,7 @@ #include // for isnan #include + namespace OpenSwath { @@ -803,22 +804,25 @@ namespace OpenSwath FeatureType fj = mrmfeature->getFeature(native_ids[j]); features.push_back(fj); } - + std::vector rank_vec1, rank_vec2; mi_precursor_combined_matrix_.resize(features.size(), features.size()); for (std::size_t i = 0; i < features.size(); i++) { FeatureType fi = features[i]; intensityi.clear(); fi->getIntensity(intensityi); + Scoring::computeRank2(intensityi, rank_vec1); for (std::size_t j = 0; j < features.size(); j++) { FeatureType fj = features[j]; intensityj.clear(); fj->getIntensity(intensityj); + Scoring::computeRank2(intensityj, rank_vec2); // compute ranked mutual information - mi_precursor_combined_matrix_.setValue(i ,j, Scoring::rankedMutualInformation(intensityi, intensityj)); + mi_precursor_combined_matrix_.setValue(i , j, Scoring::rankedMutualInformation2(rank_vec1, rank_vec2)); } } + } double MRMScoring::calcMIScore() diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp index 4401d51b066..8463c935e96 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp @@ -291,6 +291,28 @@ namespace OpenSwath::Scoring } return result; } + void computeRank2(const std::vector& v_temp, std::vector& ranks) + { + std::vector > v_sort(v_temp.size()); + + for (unsigned int i = 0; i < v_sort.size(); ++i) { + v_sort[i] = std::make_pair(v_temp[i], i); + } + + std::sort(v_sort.begin(), v_sort.end()); + + std::pair rank; + + ranks.resize(v_temp.size()); + for (unsigned int i = 0; i < v_sort.size(); ++i) + { + if (v_sort[i].first != rank.first) + { + rank = std::make_pair(v_sort[i].first, i); + } + ranks[v_sort[i].second] = rank.second; + } + } double rankedMutualInformation(std::vector& data1, std::vector& data2) { @@ -305,6 +327,17 @@ namespace OpenSwath::Scoring double result = calcMutualInformation(arr_int_data1, arr_int_data2, int_data1.size()); + return result; + } + double rankedMutualInformation2(std::vector& data1, std::vector& data2) + { + OPENSWATH_PRECONDITION(data1.size() != 0 && data1.size() == data2.size(), "Both data vectors need to have the same length"); + + unsigned int* arr_int_data1 = &data1[0]; + unsigned int* arr_int_data2 = &data2[0]; + + double result = calcMutualInformation(arr_int_data1, arr_int_data2, data1.size()); + return result; } } //namespace OpenMS // namespace Scoring From d3ee09708dc7e190c24e85672f53879d4511a772 Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Fri, 5 Nov 2021 15:57:44 +0100 Subject: [PATCH 03/10] little fix --- src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp index 8463c935e96..9e087e5f236 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp @@ -231,7 +231,8 @@ namespace OpenSwath::Scoring // sigma_1 * sigma_2 * n denominator = sqrt(sqsum1 * sqsum2); } - + //avoids division in the for loop + denominator = 1/denominator; XCorrArrayType result; result.data.reserve( (size_t)std::ceil((2*maxdelay + 1) / lag)); int cnt = 0; @@ -257,12 +258,12 @@ namespace OpenSwath::Scoring if (denominator > 0) { - result.data.push_back(std::make_pair(delay, sxy/denominator)); + result.data.emplace_back(delay, sxy*denominator); } else { // e.g. if all datapoints are zero - result.data.push_back(std::make_pair(delay, 0)); + result.data.emplace_back(delay, 0); } } return result; From 2aa9ff749bf8a28d6b973053f3c53cb1df146947 Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Fri, 12 Nov 2021 17:17:26 +0100 Subject: [PATCH 04/10] new computeRank --- .../OPENSWATH/MRMTransitionGroupPicker.h | 6 +++-- .../source/ANALYSIS/OPENSWATH/MRMScoring.cpp | 26 ++++++++++++++----- .../OpenMS/OPENSWATHALGO/ALGO/Scoring.h | 8 +++--- .../source/OPENSWATHALGO/ALGO/Scoring.cpp | 12 +++++---- .../openswathalgo/Scoring_test.cpp | 20 +++++++++----- 5 files changed, 48 insertions(+), 24 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMTransitionGroupPicker.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMTransitionGroupPicker.h index 5e65fddc076..bbe644d67cb 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMTransitionGroupPicker.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/MRMTransitionGroupPicker.h @@ -445,12 +445,13 @@ namespace OpenMS double transition_total_mi = 0; if (compute_total_mi_) { + std::vector rank_vec1, rank_vec2; std::vector chrom_vect_id, chrom_vect_det; for (typename SpectrumT::const_iterator it = chromatogram.begin(); it != chromatogram.end(); it++) { chrom_vect_id.push_back(it->getIntensity()); } - + OpenSwath::Scoring::computeRank(chrom_vect_id, rank_vec2); // compute baseline mutual information int transition_total_mi_norm = 0; for (Size m = 0; m < transition_group.getTransitions().size(); m++) @@ -463,7 +464,8 @@ namespace OpenMS { chrom_vect_det.push_back(it->getIntensity()); } - transition_total_mi += OpenSwath::Scoring::rankedMutualInformation(chrom_vect_det, chrom_vect_id); + OpenSwath::Scoring::computeRank(chrom_vect_det, rank_vec1); + transition_total_mi += OpenSwath::Scoring::rankedMutualInformation(rank_vec1, rank_vec2); transition_total_mi_norm++; } } diff --git a/src/openms/source/ANALYSIS/OPENSWATH/MRMScoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/MRMScoring.cpp index 10d4b49a637..672696fa2fc 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/MRMScoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/MRMScoring.cpp @@ -709,19 +709,22 @@ namespace OpenSwath { std::vector intensityi, intensityj; mi_matrix_.resize(native_ids.size(),native_ids.size()); + std::vector rank_vec1, rank_vec2; for (std::size_t i = 0; i < native_ids.size(); i++) { FeatureType fi = mrmfeature->getFeature(native_ids[i]); intensityi.clear(); fi->getIntensity(intensityi); + Scoring::computeRank(intensityi, rank_vec1); for (std::size_t j = i; j < native_ids.size(); j++) { FeatureType fj = mrmfeature->getFeature(native_ids[j]); intensityj.clear(); fj->getIntensity(intensityj); + Scoring::computeRank(intensityj, rank_vec2); // compute ranked mutual information - mi_matrix_.setValue(i,j,Scoring::rankedMutualInformation(intensityi, intensityj)); + mi_matrix_.setValue(i,j,Scoring::rankedMutualInformation(rank_vec1, rank_vec2)); } } } @@ -730,19 +733,22 @@ namespace OpenSwath { std::vector intensityi, intensityj; mi_contrast_matrix_.resize(native_ids_set1.size(), native_ids_set2.size()); + std::vector rank_vec1, rank_vec2; for (std::size_t i = 0; i < native_ids_set1.size(); i++) { FeatureType fi = mrmfeature->getFeature(native_ids_set1[i]); //mi_contrast_matrix_[i].resize(native_ids_set2.size()); intensityi.clear(); fi->getIntensity(intensityi); + Scoring::computeRank(intensityi, rank_vec1); for (std::size_t j = 0; j < native_ids_set2.size(); j++) { FeatureType fj = mrmfeature->getFeature(native_ids_set2[j]); intensityj.clear(); fj->getIntensity(intensityj); + Scoring::computeRank(intensityj, rank_vec2); // compute ranked mutual information - mi_contrast_matrix_.setValue(i, j, Scoring::rankedMutualInformation(intensityi, intensityj)); + mi_contrast_matrix_.setValue(i, j, Scoring::rankedMutualInformation(rank_vec1, rank_vec2)); } } } @@ -751,18 +757,21 @@ namespace OpenSwath { std::vector intensityi, intensityj; mi_precursor_matrix_.resize(precursor_ids.size(),precursor_ids.size()); + std::vector rank_vec1, rank_vec2; for (std::size_t i = 0; i < precursor_ids.size(); i++) { FeatureType fi = mrmfeature->getPrecursorFeature(precursor_ids[i]); intensityi.clear(); fi->getIntensity(intensityi); + Scoring::computeRank(intensityi, rank_vec1); for (std::size_t j = i; j < precursor_ids.size(); j++) { FeatureType fj = mrmfeature->getPrecursorFeature(precursor_ids[j]); intensityj.clear(); fj->getIntensity(intensityj); + Scoring::computeRank(intensityj, rank_vec2); // compute ranked mutual information - mi_precursor_matrix_.setValue(i, j, Scoring::rankedMutualInformation(intensityi, intensityj)); + mi_precursor_matrix_.setValue(i, j, Scoring::rankedMutualInformation(rank_vec1, rank_vec2)); } } } @@ -771,19 +780,22 @@ namespace OpenSwath { std::vector intensityi, intensityj; mi_precursor_contrast_matrix_.resize(precursor_ids.size(), native_ids.size()); + std::vector rank_vec1, rank_vec2; for (std::size_t i = 0; i < precursor_ids.size(); i++) { FeatureType fi = mrmfeature->getPrecursorFeature(precursor_ids[i]); //mi_precursor_contrast_matrix_[i].resize(native_ids.size()); intensityi.clear(); fi->getIntensity(intensityi); + Scoring::computeRank(intensityi, rank_vec1); for (std::size_t j = 0; j < native_ids.size(); j++) { FeatureType fj = mrmfeature->getFeature(native_ids[j]); intensityj.clear(); fj->getIntensity(intensityj); + Scoring::computeRank(intensityj, rank_vec2); // compute ranked mutual information - mi_precursor_contrast_matrix_.setValue(i, j, Scoring::rankedMutualInformation(intensityi, intensityj)); + mi_precursor_contrast_matrix_.setValue(i, j, Scoring::rankedMutualInformation(rank_vec1, rank_vec2)); } } } @@ -803,20 +815,22 @@ namespace OpenSwath FeatureType fj = mrmfeature->getFeature(native_ids[j]); features.push_back(fj); } - + std::vector rank_vec1, rank_vec2; mi_precursor_combined_matrix_.resize(features.size(), features.size()); for (std::size_t i = 0; i < features.size(); i++) { FeatureType fi = features[i]; intensityi.clear(); fi->getIntensity(intensityi); + Scoring::computeRank(intensityi, rank_vec1); for (std::size_t j = 0; j < features.size(); j++) { FeatureType fj = features[j]; intensityj.clear(); fj->getIntensity(intensityj); + Scoring::computeRank(intensityj, rank_vec2); // compute ranked mutual information - mi_precursor_combined_matrix_.setValue(i ,j, Scoring::rankedMutualInformation(intensityi, intensityj)); + mi_precursor_combined_matrix_.setValue(i ,j, Scoring::rankedMutualInformation(rank_vec1, rank_vec2)); } } } diff --git a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h index 695d8618988..c25ef763af2 100644 --- a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h +++ b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h @@ -131,12 +131,12 @@ namespace OpenSwath OPENSWATHALGO_DLLAPI void normalize_sum(double x[], unsigned int n); // Compute rank of vector elements - OPENSWATHALGO_DLLAPI std::vector computeRank(const std::vector& v_temp); - OPENSWATHALGO_DLLAPI void computeRank2(const std::vector& v, std::vector& ranks); + //OPENSWATHALGO_DLLAPI std::vector computeRank(const std::vector& v_temp); + OPENSWATHALGO_DLLAPI void computeRank(const std::vector& v, std::vector& ranks); // Estimate rank-transformed mutual information between two vectors of data points - OPENSWATHALGO_DLLAPI double rankedMutualInformation(std::vector& data1, std::vector& data2); - OPENSWATHALGO_DLLAPI double rankedMutualInformation2(std::vector& data1, std::vector& data2); + //OPENSWATHALGO_DLLAPI double rankedMutualInformation(std::vector& data1, std::vector& data2); + OPENSWATHALGO_DLLAPI double rankedMutualInformation(std::vector& data1, std::vector& data2); //@} diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp index d04cefc5dfe..4fda1979093 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp @@ -269,7 +269,7 @@ namespace OpenSwath::Scoring } return result; } - +/* std::vector computeRank(const std::vector& v_temp) { std::vector > v_sort(v_temp.size()); @@ -293,7 +293,8 @@ namespace OpenSwath::Scoring } return result; } - void computeRank2(const std::vector& v_temp, std::vector& ranks_out) +*/ + void computeRank(const std::vector& v_temp, std::vector& ranks_out) { std::vector ranks{}; ranks.resize(v_temp.size()); @@ -313,7 +314,7 @@ namespace OpenSwath::Scoring ranks_out[ranks[i]] = y; } } - +/* double rankedMutualInformation(std::vector& data1, std::vector& data2) { OPENSWATH_PRECONDITION(data1.size() != 0 && data1.size() == data2.size(), "Both data vectors need to have the same length"); @@ -328,8 +329,9 @@ namespace OpenSwath::Scoring double result = calcMutualInformation(arr_int_data1, arr_int_data2, int_data1.size()); return result; - } - double rankedMutualInformation2(std::vector& data1, std::vector& data2) + }*/ + + double rankedMutualInformation(std::vector& data1, std::vector& data2) { OPENSWATH_PRECONDITION(data1.size() != 0 && data1.size() == data2.size(), "Both data vectors need to have the same length"); diff --git a/src/tests/class_tests/openswathalgo/Scoring_test.cpp b/src/tests/class_tests/openswathalgo/Scoring_test.cpp index f5acb0b5266..37d73bebbaf 100644 --- a/src/tests/class_tests/openswathalgo/Scoring_test.cpp +++ b/src/tests/class_tests/openswathalgo/Scoring_test.cpp @@ -400,8 +400,8 @@ y = [5.97543668746948 4.2749171257019 3.3301842212677 4.08597040176392 5.5030703 std::vector data2 {15.8951349258423, 41.5446395874023, 76.0746307373047, 109.069435119629, 111.90364074707, 169.79216003418, 121.043930053711, 63.0136985778809, 44.6150207519531, 21.4926776885986, 7.93575811386108}; - - auto result = Scoring::computeRank(data1); + std::vector result; + Scoring::computeRank(data1, result); TEST_EQUAL (result[0],7); TEST_EQUAL (result[1],4); @@ -431,7 +431,7 @@ x = [15.8951349258423 41.5446395874023 76.0746307373047 109.069435119629 111.903 m1 = mi(x_ranking,y_ranking) */ - +/* static const double arr1[] = { 5.97543668746948, 4.2749171257019, 3.3301842212677, 4.08597040176392, 5.50307035446167, 5.24326848983765, @@ -442,10 +442,16 @@ m1 = mi(x_ranking,y_ranking) 15.8951349258423, 41.5446395874023, 76.0746307373047, 109.069435119629, 111.90364074707, 169.79216003418, 121.043930053711, 63.0136985778809, 44.6150207519531, 21.4926776885986, 7.93575811386108 }; - std::vector data1 (arr1, arr1 + sizeof(arr1) / sizeof(arr1[0]) ); - std::vector data2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) ); - - double result = Scoring::rankedMutualInformation(data1, data2); + */ + std::vector data1 = {5.97543668746948, 4.2749171257019, 3.3301842212677, 4.08597040176392, 5.50307035446167, 5.24326848983765, + 8.40812492370605, 2.83419919013977, 6.94378805160522, 7.69957494735718, 4.08597040176392}; + std::vector data2 = {15.8951349258423, 41.5446395874023, 76.0746307373047, 109.069435119629, 111.90364074707, 169.79216003418, + 121.043930053711, 63.0136985778809, 44.6150207519531, 21.4926776885986, 7.93575811386108}; + std::vector rank_vec1, rank_vec2; + Scoring::computeRank(data1, rank_vec1); + Scoring::computeRank(data2, rank_vec2); + + double result = Scoring::rankedMutualInformation(rank_vec1, rank_vec2); TEST_REAL_SIMILAR (result, 3.2776); } From 3e5924721c6a697a84251b6501c133598034f3dc Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Fri, 12 Nov 2021 17:21:13 +0100 Subject: [PATCH 05/10] delete old function --- .../source/OPENSWATHALGO/ALGO/Scoring.cpp | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp index 4fda1979093..7e9039620a1 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp @@ -269,31 +269,7 @@ namespace OpenSwath::Scoring } return result; } -/* - std::vector computeRank(const std::vector& v_temp) - { - std::vector > v_sort(v_temp.size()); - - for (unsigned int i = 0; i < v_sort.size(); ++i) { - v_sort[i] = std::make_pair(v_temp[i], i); - } - - std::sort(v_sort.begin(), v_sort.end()); - - std::pair rank; - std::vector result(v_temp.size()); - for (unsigned int i = 0; i < v_sort.size(); ++i) - { - if (v_sort[i].first != rank.first) - { - rank = std::make_pair(v_sort[i].first, i); - } - result[v_sort[i].second] = rank.second; - } - return result; - } -*/ void computeRank(const std::vector& v_temp, std::vector& ranks_out) { std::vector ranks{}; @@ -314,22 +290,6 @@ namespace OpenSwath::Scoring ranks_out[ranks[i]] = y; } } -/* - double rankedMutualInformation(std::vector& data1, std::vector& data2) - { - OPENSWATH_PRECONDITION(data1.size() != 0 && data1.size() == data2.size(), "Both data vectors need to have the same length"); - - // rank the data - std::vector int_data1 = computeRank(data1); - std::vector int_data2 = computeRank(data2); - - unsigned int* arr_int_data1 = &int_data1[0]; - unsigned int* arr_int_data2 = &int_data2[0]; - - double result = calcMutualInformation(arr_int_data1, arr_int_data2, int_data1.size()); - - return result; - }*/ double rankedMutualInformation(std::vector& data1, std::vector& data2) { From 5330cc681c59e4b8171511d3f2bf010bf85b295f Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Tue, 16 Nov 2021 13:12:56 +0100 Subject: [PATCH 06/10] little fixes --- src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp index 7e9039620a1..e36848a5c7c 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp @@ -153,9 +153,10 @@ namespace OpenSwath::Scoring { stdev = 1; // all data is equal } + stdev = 1/stdev; for (std::size_t i = 0; i < data.size(); i++) { - data[i] = (data[i] - mean) / stdev; + data[i] = (data[i] - mean) * stdev; } } @@ -168,9 +169,10 @@ namespace OpenSwath::Scoring standardize_data(data1); standardize_data(data2); XCorrArrayType result = calculateCrossCorrelation(data1, data2, maxdelay, lag); + for (XCorrArrayType::iterator it = result.begin(); it != result.end(); ++it) { - it->second = it->second / data1.size(); + it->second /= data1.size(); } return result; } From 6c90292be4ea0a726ea85048dc3c0e8841d882d8 Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Thu, 18 Nov 2021 17:13:32 +0100 Subject: [PATCH 07/10] fix --- src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp index e36848a5c7c..3afe5015c9d 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp @@ -32,6 +32,7 @@ // $Authors: Hannes Roest $ // -------------------------------------------------------------------------- +#include #include #include #include @@ -170,9 +171,10 @@ namespace OpenSwath::Scoring standardize_data(data2); XCorrArrayType result = calculateCrossCorrelation(data1, data2, maxdelay, lag); - for (XCorrArrayType::iterator it = result.begin(); it != result.end(); ++it) + double d = 1.0 / data1.size(); + for(auto& e : result) { - it->second /= data1.size(); + e.second *= d; } return result; } From 16902f0dfe4da3af2f801495141709961eefb8b3 Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Thu, 25 Nov 2021 10:17:03 +0100 Subject: [PATCH 08/10] new MutualInformation fkt --- .../OpenMS/OPENSWATHALGO/ALGO/Scoring.h | 13 +++ .../source/OPENSWATHALGO/ALGO/Scoring.cpp | 93 ++++++++++++++++++- 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h index c25ef763af2..970e7a64740 100644 --- a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h +++ b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h @@ -70,6 +70,17 @@ namespace OpenSwath iterator end() {return data.end();} const_iterator end() const {return data.end();} }; + + struct jpstate + { + std::vector jointProbabilityVector; + int numJointStates; + std::vector firstProbabilityVector; + int numFirstStates; + std::vector secondProbabilityVector; + int numSecondStates; + std::vector jointPositionVector; + }; //@} /** @name Helper functions */ @@ -138,6 +149,8 @@ namespace OpenSwath //OPENSWATHALGO_DLLAPI double rankedMutualInformation(std::vector& data1, std::vector& data2); OPENSWATHALGO_DLLAPI double rankedMutualInformation(std::vector& data1, std::vector& data2); + OPENSWATHALGO_DLLAPI unsigned int maxElem(const std::vector& arr); + //@} } diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp index 3afe5015c9d..2aa6eb7d96b 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp @@ -56,7 +56,7 @@ namespace OpenSwath::Scoring return; } auto inverse_sum = 1 / sumx; // precompute inverse since division is expensive! - for (int i = 0; i < n; ++i) + for (unsigned int i = 0; i < n; ++i) { x[i] *= inverse_sum; } @@ -295,14 +295,99 @@ namespace OpenSwath::Scoring } } + unsigned int maxElem(const std::vector& arr) + { + unsigned int max = arr[0]; + for(auto e : arr) + { + if(e > max) max = e; + } + return max+1; + } + + + + jpstate calcJointProbability(const std::vector& firstVector,const std::vector& secondVector,const int& vectorLength) + { + jpstate state; + double length = 1.0 / vectorLength; + unsigned int firstNumStates = maxElem(firstVector); + unsigned int secondNumStates = maxElem(secondVector); + unsigned int jointNumStates = firstNumStates * secondNumStates; + + std::vector firstStateCounts(firstNumStates, 0); + std::vector secondStateCounts(secondNumStates, 0); + std::vector jointStateCounts(jointNumStates, 0); + std::vector jointPosition(firstNumStates, 0); + + std::vector firstStateProbs(firstNumStates, 0.0); + std::vector secondStateProbs(secondNumStates, 0.0); + std::vector jointStateProbs(jointNumStates, 0.0); + + for(int i = 0; i < vectorLength; i++) + { + firstStateCounts[firstVector[i]] += 1; + secondStateCounts[secondVector[i]] += 1; + jointPosition[i] = secondVector[i] * firstNumStates + firstVector[i]; + jointStateCounts[jointPosition[i]] += 1; + } + + for (unsigned int i = 0; i < firstNumStates; i++) { + firstStateProbs[i] = firstStateCounts[i] * length; + } + + for (unsigned int i = 0; i < secondNumStates; i++) { + secondStateProbs[i] = secondStateCounts[i] * length; + } + + for (unsigned int i = 0; i < jointNumStates; i++) { + jointStateProbs[i] = jointStateCounts[i] * length; + } + + state.jointPositionVector = jointPosition; + state.jointProbabilityVector = jointStateProbs; + state.numJointStates = jointNumStates; + state.firstProbabilityVector = firstStateProbs; + state.numFirstStates = firstNumStates; + state.secondProbabilityVector = secondStateProbs; + state.numSecondStates = secondNumStates; + + return state; + } + + double mutualInformation(jpstate& state,const std::vector& firstVector,const std::vector& secondVector) + { + double mutualInformation = 0.0; + //int firstIndex,secondIndex; + + /* + ** I(X;Y) = \sum_x \sum_y p(x,y) * \log (p(x,y)/p(x)p(y)) + */ + for (unsigned int i = 0; i < firstVector.size(); i++) + { + + int j = state.jointPositionVector[i]; + if(state.jointProbabilityVector[j] != 0) + { + /*double division is probably more stable than multiplying two small numbers together + ** mutualInformation += state.jointProbabilityVector[i] * log(state.jointProbabilityVector[i] / (state.firstProbabilityVector[firstIndex] * state.secondProbabilityVector[secondIndex])); + */ + mutualInformation += state.jointProbabilityVector[j] * + log2(state.jointProbabilityVector[j] / state.firstProbabilityVector[firstVector[i]] / + state.secondProbabilityVector[secondVector[i]]); + state.jointProbabilityVector[j] = 0; + } + } + return mutualInformation; + } + double rankedMutualInformation(std::vector& data1, std::vector& data2) { OPENSWATH_PRECONDITION(data1.size() != 0 && data1.size() == data2.size(), "Both data vectors need to have the same length"); - unsigned int* arr_int_data1 = &data1[0]; - unsigned int* arr_int_data2 = &data2[0]; + jpstate state = calcJointProbability(data1, data2, data1.size()); - double result = calcMutualInformation(arr_int_data1, arr_int_data2, data1.size()); + double result = mutualInformation(state, data1, data2); return result; } From de64c00d9ca4e4c8f221a2856512c7d79464e77d Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Fri, 26 Nov 2021 10:25:02 +0100 Subject: [PATCH 09/10] .h definition --- .../include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h | 2 ++ .../source/OPENSWATHALGO/ALGO/Scoring.cpp | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h index 970e7a64740..d9741f790e1 100644 --- a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h +++ b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/ALGO/Scoring.h @@ -150,6 +150,8 @@ namespace OpenSwath OPENSWATHALGO_DLLAPI double rankedMutualInformation(std::vector& data1, std::vector& data2); OPENSWATHALGO_DLLAPI unsigned int maxElem(const std::vector& arr); + OPENSWATHALGO_DLLAPI jpstate calcJointProbability(const std::vector& firstVector,const std::vector& secondVector,const int& vectorLength); + OPENSWATHALGO_DLLAPI double mutualInformation(jpstate& state,const std::vector& firstVector,const std::vector& secondVector); //@} diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp index 2aa6eb7d96b..e60468535c3 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp @@ -307,7 +307,7 @@ namespace OpenSwath::Scoring - jpstate calcJointProbability(const std::vector& firstVector,const std::vector& secondVector,const int& vectorLength) + jpstate calcJointProbability(const std::vector& firstVector,const std::vector& secondVector,const int& vectorLength) { jpstate state; double length = 1.0 / vectorLength; @@ -355,7 +355,7 @@ namespace OpenSwath::Scoring return state; } - double mutualInformation(jpstate& state,const std::vector& firstVector,const std::vector& secondVector) + double mutualInformation(jpstate& state,const std::vector& firstVector,const std::vector& secondVector) { double mutualInformation = 0.0; //int firstIndex,secondIndex; @@ -388,7 +388,11 @@ namespace OpenSwath::Scoring jpstate state = calcJointProbability(data1, data2, data1.size()); double result = mutualInformation(state, data1, data2); - + /* + unsigned int* arr_int_data1 = &data1[0]; + unsigned int* arr_int_data2 = &data2[0]; + double result = calcMutualInformation(arr_int_data1, arr_int_data2, data1.size()); + */ return result; } } //namespace OpenMS // namespace Scoring From d4299dd2037a80437a1cda130aa089763bd202af Mon Sep 17 00:00:00 2001 From: Vincent Musch Date: Fri, 26 Nov 2021 14:30:21 +0100 Subject: [PATCH 10/10] tests --- .../source/OPENSWATHALGO/ALGO/Scoring.cpp | 1 + .../openswathalgo/Scoring_test.cpp | 87 +++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp index e60468535c3..a850752b641 100644 --- a/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp +++ b/src/openswathalgo/source/OPENSWATHALGO/ALGO/Scoring.cpp @@ -281,6 +281,7 @@ namespace OpenSwath::Scoring std::iota(ranks.begin(), ranks.end(), 0); std::sort(ranks.begin(), ranks.end(), [&v_temp](unsigned int i, unsigned int j) { return v_temp[i] < v_temp[j]; }); + ranks_out.clear(); ranks_out.resize(v_temp.size()); double x = 0; unsigned int y = 0; diff --git a/src/tests/class_tests/openswathalgo/Scoring_test.cpp b/src/tests/class_tests/openswathalgo/Scoring_test.cpp index 37d73bebbaf..206c77cb10d 100644 --- a/src/tests/class_tests/openswathalgo/Scoring_test.cpp +++ b/src/tests/class_tests/openswathalgo/Scoring_test.cpp @@ -418,6 +418,93 @@ y = [5.97543668746948 4.2749171257019 3.3301842212677 4.08597040176392 5.5030703 } END_SECTION +BOOST_AUTO_TEST_CASE(test_maxElem) +{ + std::vector data1 = {0, 7, 2, 1, 5, 4, 6}; + std::vector data2 = {10, 35, 4, 2, 36, 12}; + + unsigned int result = Scoring::maxElem(data1); + TEST_EQUAL(result, 8); + result = Scoring::maxElem(data2); + TEST_EQUAL(result, 37); + +} +END_SECTION + +BOOST_AUTO_TEST_CASE(test_calcJointProbability) +{ + std::vector data1 {5.97543668746948, 4.2749171257019, 3.3301842212677, 4.08597040176392, 5.50307035446167, 5.24326848983765, + 8.40812492370605, 2.83419919013977, 6.94378805160522, 7.69957494735718, 4.08597040176392}; + + std::vector data2 {15.8951349258423, 41.5446395874023, 76.0746307373047, 109.069435119629, 111.90364074707, 169.79216003418, + 121.043930053711, 63.0136985778809, 44.6150207519531, 21.4926776885986, 7.93575811386108}; + std::vector ranks1; + std::vector ranks2; + Scoring::computeRank(data1, ranks1); + Scoring::computeRank(data2, ranks2); + + Scoring::jpstate result = Scoring::calcJointProbability(ranks1, ranks2, ranks1.size()); + + TEST_REAL_SIMILAR (result.firstProbabilityVector[0],0.09090909); + TEST_REAL_SIMILAR (result.firstProbabilityVector[1],0.09090909); + TEST_REAL_SIMILAR (result.firstProbabilityVector[2],0.18181818); + TEST_REAL_SIMILAR (result.firstProbabilityVector[3],0); + TEST_REAL_SIMILAR (result.firstProbabilityVector[4],0.09090909); + TEST_REAL_SIMILAR (result.firstProbabilityVector[5],0.09090909); + TEST_REAL_SIMILAR (result.firstProbabilityVector[6],0.09090909); + TEST_REAL_SIMILAR (result.firstProbabilityVector[7],0.09090909); + TEST_REAL_SIMILAR (result.firstProbabilityVector[8],0.09090909); + TEST_REAL_SIMILAR (result.firstProbabilityVector[9],0.09090909); + TEST_REAL_SIMILAR (result.firstProbabilityVector[10],0.09090909); + + + TEST_REAL_SIMILAR (result.secondProbabilityVector[0],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[1],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[2],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[3],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[4],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[5],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[6],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[7],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[8],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[9],0.09090909); + TEST_REAL_SIMILAR (result.secondProbabilityVector[10],0.09090909); + + TEST_EQUAL (result.jointPositionVector[0],18); + TEST_EQUAL (result.jointPositionVector[1],37); + TEST_EQUAL (result.jointPositionVector[2],67); + TEST_EQUAL (result.jointPositionVector[3],79); + TEST_EQUAL (result.jointPositionVector[4],94); + TEST_EQUAL (result.jointPositionVector[5],115); + TEST_EQUAL (result.jointPositionVector[6],109); + TEST_EQUAL (result.jointPositionVector[7],55); + TEST_EQUAL (result.jointPositionVector[8],52); + TEST_EQUAL (result.jointPositionVector[9],31); + TEST_EQUAL (result.jointPositionVector[10],2); + TEST_EQUAL (result.jointPositionVector[11],0); + +} +END_SECTION + +BOOST_AUTO_TEST_CASE(test_mutualInformation) +{ + std::vector data1 {5.97543668746948, 4.2749171257019, 3.3301842212677, 4.08597040176392, 5.50307035446167, 5.24326848983765, + 8.40812492370605, 2.83419919013977, 6.94378805160522, 7.69957494735718, 4.08597040176392}; + + std::vector data2 {15.8951349258423, 41.5446395874023, 76.0746307373047, 109.069435119629, 111.90364074707, 169.79216003418, + 121.043930053711, 63.0136985778809, 44.6150207519531, 21.4926776885986, 7.93575811386108}; + std::vector ranks1; + std::vector ranks2; + Scoring::computeRank(data1, ranks1); + Scoring::computeRank(data2, ranks2); + + Scoring::jpstate state = Scoring::calcJointProbability(ranks1, ranks2, ranks1.size()); + double result = Scoring::mutualInformation(state, ranks1, ranks2); + + TEST_REAL_SIMILAR (result, 3.2776); +} +END_SECTION + BOOST_AUTO_TEST_CASE(test_rankedMutualInformation) { /*