From 0d0de9fb8af26f5f2f087b4ac267fc556e4f1aa2 Mon Sep 17 00:00:00 2001 From: Simon Gene Gottlieb Date: Thu, 14 Sep 2023 10:03:09 +0200 Subject: [PATCH 1/3] patch: adjusting search schemes a lot --- src/example/main.cpp | 8 +-- src/fmindex-collection/CMakeLists.txt | 6 ++ src/run_search_schemes/main.cpp | 4 +- src/search_schemes/expand.h | 33 ++++++++++- src/search_schemes/generator/all.h | 81 ++++++++++++++++++++++----- src/search_schemes/isComplete.h | 1 + src/search_schemes/search_schemes.h | 16 ++++++ 7 files changed, 126 insertions(+), 23 deletions(-) create mode 100644 src/search_schemes/search_schemes.h diff --git a/src/example/main.cpp b/src/example/main.cpp index 59940a1a..edf303e3 100644 --- a/src/example/main.cpp +++ b/src/example/main.cpp @@ -126,9 +126,9 @@ int main(int argc, char const* const* argv) { throw std::runtime_error("unknown search scheme generetaror \"" + config.generator + "\""); } auto len = mut_queries[0].size(); - auto oss = iter->second(0, k, 0, 0); //!TODO last two parameters of second are not being used + auto oss = iter->second.generator(0, k, 0, 0); //!TODO last two parameters of second are not being used auto ess = search_schemes::expand(oss, len); - auto dss = search_schemes::expandDynamic(oss, len, 4, 3'000'000'000); //!TODO use correct Sigma and text size + auto dss = search_schemes::expandDynamicExpected(oss, len, 4, 3'000'000'000); //!TODO use correct Sigma and text size fmt::print("ss diff: {} to {}, using dyn: {}\n", search_schemes::expectedNodeCount(ess, 4, 3'000'000'000), search_schemes::expectedNodeCount(dss, 4, 3'000'000'000), config.generator_dyn); if (!config.generator_dyn) { return ess; @@ -145,9 +145,9 @@ int main(int argc, char const* const* argv) { throw std::runtime_error("unknown search scheme generetaror \"" + config.generator + "\""); } auto len = mut_queries[0].size(); - auto oss = iter->second(j, j, 0, 0); //!TODO last two parameters of second are not being used + auto oss = iter->second.generator(j, j, 0, 0); //!TODO last two parameters of second are not being used auto ess = search_schemes::expand(oss, len); - auto dss = search_schemes::expandDynamic(oss, len, 4, 3'000'000'000); //!TODO use correct Sigma and text size + auto dss = search_schemes::expandDynamicExpected(oss, len, 4, 3'000'000'000); //!TODO use correct Sigma and text size if (!config.generator_dyn) { return ess; } else { diff --git a/src/fmindex-collection/CMakeLists.txt b/src/fmindex-collection/CMakeLists.txt index 845776fb..67f575f1 100644 --- a/src/fmindex-collection/CMakeLists.txt +++ b/src/fmindex-collection/CMakeLists.txt @@ -1,3 +1,9 @@ +# ----------------------------------------------------------------------------------------------------- +# Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin +# Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik +# This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +# shipped with this file. +# ----------------------------------------------------------------------------------------------------- cmake_minimum_required (VERSION 3.8) project(fmindex-collection) diff --git a/src/run_search_schemes/main.cpp b/src/run_search_schemes/main.cpp index 607c040f..02c73e01 100644 --- a/src/run_search_schemes/main.cpp +++ b/src/run_search_schemes/main.cpp @@ -27,7 +27,7 @@ int main(int argc, char** argv) { auto K = std::stod(argv[2]); auto gen = argv[3]; { - auto oss = search_schemes::generator::all.at(gen)(0, K, 0, 0); + auto oss = search_schemes::generator::all.at(gen).generator(0, K, 0, 0); if (oss.size() == 0) return 0; // for (auto s : oss) { @@ -45,7 +45,7 @@ int main(int argc, char** argv) { auto nce = [&](auto ss) { return search_schemes::expectedNodeCountEdit(ss, 4, 3'000'000'000); }; - auto dss = search_schemes::expandDynamic(oss, len, 4, 3'000'000'000); + auto dss = search_schemes::expandDynamicExpected(oss, len, 4, 3'000'000'000); // fmt::print("ess:\n"); diff --git a/src/search_schemes/expand.h b/src/search_schemes/expand.h index 3207ab4c..3dfad1aa 100644 --- a/src/search_schemes/expand.h +++ b/src/search_schemes/expand.h @@ -8,6 +8,7 @@ #include "Scheme.h" #include "isValid.h" +#include "nodeCount.h" #include "expectedNodeCount.h" #include @@ -174,6 +175,7 @@ inline auto expand(Search s, std::vector parts) -> std::optional } return {r}; } + inline auto expand(Scheme ss, std::vector parts) -> Scheme { auto r = Scheme{}; for (auto const& s : ss) { @@ -185,7 +187,34 @@ inline auto expand(Scheme ss, std::vector parts) -> Scheme { return r; } -inline auto expandDynamic(Scheme ss, size_t _newLen, size_t sigma, size_t N) -> Scheme { +template +auto expandDynamic(Scheme ss, size_t _newLen, size_t sigma) -> Scheme { + if (ss.size() == 0) return {}; + auto additionalPos = _newLen - ss[0].pi.size(); + auto counts = std::vector(ss[0].pi.size(), 1); + + for (size_t i{0}; i::max(); + size_t bestPos = 0; + for (size_t j{0}; j < ss[0].pi.size(); ++j) { + counts[j] += 1; + auto ess = expand(ss, counts); + counts[j] -= 1; + auto f = Edit?nodeCountEdit(ess, sigma):nodeCount(ess, sigma); + if (f < bestVal) { + bestVal = f; + bestPos = j; + } + } + counts[bestPos] += 1; + } + + return expand(ss, counts); +} + +template +auto expandDynamicExpected(Scheme ss, size_t _newLen, size_t sigma, size_t N) -> Scheme { + if (ss.size() == 0) return {}; auto additionalPos = _newLen - ss[0].pi.size(); auto counts = std::vector(ss[0].pi.size(), 1); @@ -196,7 +225,7 @@ inline auto expandDynamic(Scheme ss, size_t _newLen, size_t sigma, size_t N) -> counts[j] += 1; auto ess = expand(ss, counts); counts[j] -= 1; - auto f = expectedNodeCountEdit(ess, sigma, N); + auto f = Edit?expectedNodeCountEdit(ess, sigma, N):expectedNodeCount(ess, sigma, N); if (f < bestVal) { bestVal = f; bestPos = j; diff --git a/src/search_schemes/generator/all.h b/src/search_schemes/generator/all.h index a432c0f3..f888c7fe 100644 --- a/src/search_schemes/generator/all.h +++ b/src/search_schemes/generator/all.h @@ -19,25 +19,76 @@ #include #include #include -#include #include +#include +#include namespace search_schemes::generator { -inline auto all = std::map>{ - { "backtracking", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return backtracking(1, minError, maxError); }}, - { "optimum", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return optimum(minError, maxError); }}, - { "01*0", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return zeroOnesZero_trivial(minError, maxError); }}, - { "01*0_opt", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return zeroOnesZero_opt(minError, maxError); }}, - { "pigeon", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return pigeon_trivial(minError, maxError); }}, - { "pigeon_opt", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return pigeon_opt(minError, maxError); }}, - { "suffix", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return suffixFilter(maxError+1, minError, maxError); }}, - { "kianfar", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return kianfar(maxError); }}, - { "kucherov-k1", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return kucherov(maxError+1, maxError); }}, - { "kucherov-k2", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return kucherov(maxError+2, maxError); }}, - { "h2-k1", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return h2(maxError+1, minError, maxError); }}, - { "h2-k2", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return h2(maxError+2, minError, maxError); }}, - { "h2-k3", []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return h2(maxError+3, minError, maxError); }}, +struct GeneratorEntry { + std::string name; + std::string description; + std::function generator; }; +inline auto all = []() { + auto res = std::map{}; + auto add = [&](GeneratorEntry entry) { + res.try_emplace(entry.name, entry); + }; + add({ .name = "backtracking", + .description = "simple backtracking, not utilisying the bidirectional fm-index or search schemes", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return backtracking(1, minError, maxError); } + }); + add({ .name = "optimum", + .description = "known optimim search schemes", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return optimum(minError, maxError); } + }); + add({ .name = "01*0", + .description = "based on 01*0 seeds", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return zeroOnesZero_trivial(minError, maxError); } + }); + add({ .name = "01*0_opt", + .description = "based on 01*0 seeds, but joining search schemes with same part order", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return zeroOnesZero_opt(minError, maxError); } + }); + add({ .name = "pigeon", + .description = "based on the pigeon hole principle", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return pigeon_trivial(minError, maxError); } + }); + add({ .name = "pigeon_opt", + .description = "based on the pigeon hole principle, removing duplicate paths", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return pigeon_opt(minError, maxError); } + }); + add({ .name = "suffix", + .description = "based on suffix filter", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return suffixFilter(maxError+1, minError, maxError); } + }); + add({ .name = "kianfar", + .description = "designed by kianfar (?)", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return kianfar(maxError); } + }); + add({ .name = "kucherov-k1", + .description = "designed by kucherov, divided into k+1 pieces (?)", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return kucherov(maxError+1, maxError); } + }); + add({ .name = "kucherov-k2", + .description = "designed by kucherov, divided into k+2 pieces (?)", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return kucherov(maxError+2, maxError); } + }); + add({ .name = "h2-k1", + .description = "designed by gottlieb, divided into k+1 pieces", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return h2(maxError+1, minError, maxError); } + }); + add({ .name = "h2-k2", + .description = "designed by gottlieb, divided into k+2 pieces", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return h2(maxError+2, minError, maxError); } + }); + add({ .name = "h2-k3", + .description = "designed by gottlieb, divided into k+3 pieces", + .generator = []([[maybe_unused]] int minError, [[maybe_unused]] int maxError, [[maybe_unused]] int sigma, [[maybe_unused]] int dbSize) { return h2(maxError+3, minError, maxError); } + }); + return res; +}(); + } diff --git a/src/search_schemes/isComplete.h b/src/search_schemes/isComplete.h index e0e4d660..415e65b6 100644 --- a/src/search_schemes/isComplete.h +++ b/src/search_schemes/isComplete.h @@ -70,6 +70,7 @@ void generateErrorConfig(CB cb, size_t len, size_t minK, size_t maxK) { * */ inline auto isComplete(Scheme const& ss, size_t minK, size_t maxK) -> bool { + if (ss.empty()) return false; bool complete{true}; auto len = ss.at(0).pi.size(); generateErrorConfig([&](ErrorConfig const& config) { diff --git a/src/search_schemes/search_schemes.h b/src/search_schemes/search_schemes.h new file mode 100644 index 00000000..df4f72ec --- /dev/null +++ b/src/search_schemes/search_schemes.h @@ -0,0 +1,16 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file. +// ----------------------------------------------------------------------------------------------------- +#pragma once + +#include "Scheme.h" +#include "Search.h" +#include "expand.h" +#include "expectedNodeCount.h" +#include "generator/all.h" +#include "isComplete.h" +#include "isValid.h" +#include "nodeCount.h" From 5cd56f4c2f77eee388d2b7f7962329f077825b10 Mon Sep 17 00:00:00 2001 From: Simon Gene Gottlieb Date: Thu, 14 Sep 2023 10:20:04 +0200 Subject: [PATCH 2/3] patch: merge nodeCount and nodeCountEdit, same with expectedNodeCount --- src/example/main.cpp | 2 +- src/run_search_schemes/main.cpp | 4 +- src/search_schemes/expand.h | 4 +- src/search_schemes/expectedNodeCount.h | 60 ++++++++------------------ src/search_schemes/nodeCount.h | 54 +++++++---------------- src/test_search_schemes/nodeCount.cpp | 12 +++--- 6 files changed, 44 insertions(+), 92 deletions(-) diff --git a/src/example/main.cpp b/src/example/main.cpp index edf303e3..e5fde3fc 100644 --- a/src/example/main.cpp +++ b/src/example/main.cpp @@ -129,7 +129,7 @@ int main(int argc, char const* const* argv) { auto oss = iter->second.generator(0, k, 0, 0); //!TODO last two parameters of second are not being used auto ess = search_schemes::expand(oss, len); auto dss = search_schemes::expandDynamicExpected(oss, len, 4, 3'000'000'000); //!TODO use correct Sigma and text size - fmt::print("ss diff: {} to {}, using dyn: {}\n", search_schemes::expectedNodeCount(ess, 4, 3'000'000'000), search_schemes::expectedNodeCount(dss, 4, 3'000'000'000), config.generator_dyn); + fmt::print("ss diff: {} to {}, using dyn: {}\n", search_schemes::expectedNodeCount(ess, 4, 3'000'000'000), search_schemes::expectedNodeCount(dss, 4, 3'000'000'000), config.generator_dyn); if (!config.generator_dyn) { return ess; } else { diff --git a/src/run_search_schemes/main.cpp b/src/run_search_schemes/main.cpp index 02c73e01..1deca93d 100644 --- a/src/run_search_schemes/main.cpp +++ b/src/run_search_schemes/main.cpp @@ -40,10 +40,10 @@ int main(int argc, char** argv) { auto ss = search_schemes::expand(oss, len); auto nc = [&](auto ss) { - return search_schemes::expectedNodeCount(ss, 4, 3'000'000'000); + return search_schemes::expectedNodeCount(ss, 4, 3'000'000'000); }; auto nce = [&](auto ss) { - return search_schemes::expectedNodeCountEdit(ss, 4, 3'000'000'000); + return search_schemes::expectedNodeCount(ss, 4, 3'000'000'000); }; auto dss = search_schemes::expandDynamicExpected(oss, len, 4, 3'000'000'000); diff --git a/src/search_schemes/expand.h b/src/search_schemes/expand.h index 3dfad1aa..042fe8b1 100644 --- a/src/search_schemes/expand.h +++ b/src/search_schemes/expand.h @@ -200,7 +200,7 @@ auto expandDynamic(Scheme ss, size_t _newLen, size_t sigma) -> Scheme { counts[j] += 1; auto ess = expand(ss, counts); counts[j] -= 1; - auto f = Edit?nodeCountEdit(ess, sigma):nodeCount(ess, sigma); + auto f = nodeCount(ess, sigma); if (f < bestVal) { bestVal = f; bestPos = j; @@ -225,7 +225,7 @@ auto expandDynamicExpected(Scheme ss, size_t _newLen, size_t sigma, size_t N) -> counts[j] += 1; auto ess = expand(ss, counts); counts[j] -= 1; - auto f = Edit?expectedNodeCountEdit(ess, sigma, N):expectedNodeCount(ess, sigma, N); + auto f = expectedNodeCount(ess, sigma, N); if (f < bestVal) { bestVal = f; bestPos = j; diff --git a/src/search_schemes/expectedNodeCount.h b/src/search_schemes/expectedNodeCount.h index 5b024797..c78c4880 100644 --- a/src/search_schemes/expectedNodeCount.h +++ b/src/search_schemes/expectedNodeCount.h @@ -16,11 +16,13 @@ namespace search_schemes { /** + * \tparam Edit use edit distance, other wise Hamming distance * \param s search scheme * \param sigma size of the alphabet (without delimiter) * \param N size of the reference text, ~3'000'000'000 for hg */ -inline long double expectedNodeCount(Search s, size_t sigma, size_t N) { +template +long double expectedNodeCount(Search s, size_t sigma, size_t N) { auto n_max = s.pi.size(); auto e = *std::max_element(begin(s.u), end(s.u)); @@ -38,7 +40,11 @@ inline long double expectedNodeCount(Search s, size_t sigma, size_t N) { if (s.l[n-1] <= i and i <= s.u[n-1]) { newArray[i] = lastArray[i]; if (i > 0) { - newArray[i] += (sigma-1) * lastArray[i-1]; + if constexpr (Edit) { + newArray[i] += (sigma-1) * lastArray[i-1] + (sigma) * lastArray[i-1] + lastArray[i-1]; + } else { + newArray[i] += (sigma-1) * lastArray[i-1]; + } } newArray[i] *= f; acc += newArray[i]; @@ -52,48 +58,16 @@ inline long double expectedNodeCount(Search s, size_t sigma, size_t N) { return acc; } -inline long double expectedNodeCount(Scheme const& ss, size_t sigma, size_t N) { - return std::accumulate(begin(ss), end(ss), static_cast(0.), [&](long double v, auto const& s) { - return v + expectedNodeCount(s, sigma, N); - }); -} - -inline long double expectedNodeCountEdit(Search s, size_t sigma, size_t N) { - auto n_max = s.pi.size(); - auto e = *std::max_element(begin(s.u), end(s.u)); - - auto lastArray = std::vector(e+1, 0); - lastArray[0] = 1; - - long double acc = 0; - - auto newArray = std::vector(e+1, 0); - for (size_t n {1}; n <= n_max; ++n) { - auto f = N / std::pow(sigma, n); - if (f > 1) f = 1.; - - for (size_t i{0}; i < e+1; ++i) { - if (s.l[n-1] <= i and i <= s.u[n-1]) { - - newArray[i] = lastArray[i]; - if (i > 0) { - newArray[i] += (sigma-1) * lastArray[i-1] + (sigma) * lastArray[i-1] + lastArray[i-1]; - } - newArray[i] *= f; - acc += newArray[i]; - } else { - newArray[i] = 0; - } - } - std::swap(newArray, lastArray); - } - return acc; -} - -inline long double expectedNodeCountEdit(Scheme const& ss, size_t sigma, size_t N) { +/** + * \tparam Edit use edit distance, other wise Hamming distance + * \param ss search schemes + * \param sigma size of the alphabet (without delimiter) + * \param N size of the reference text, ~3'000'000'000 for hg + */ +template +long double expectedNodeCount(Scheme const& ss, size_t sigma, size_t N) { return std::accumulate(begin(ss), end(ss), static_cast(0.), [&](long double v, auto const& s) { - return v + expectedNodeCountEdit(s, sigma, N); + return v + expectedNodeCount(s, sigma, N); }); } - } diff --git a/src/search_schemes/nodeCount.h b/src/search_schemes/nodeCount.h index 037c2a37..d42e160f 100644 --- a/src/search_schemes/nodeCount.h +++ b/src/search_schemes/nodeCount.h @@ -14,7 +14,11 @@ namespace search_schemes { -inline long double nodeCount(Search s, size_t sigma) { +/** + * If its not edit distance, its Hamming distance + */ +template +long double nodeCount(Search s, size_t sigma) { auto n_max = s.pi.size(); auto e = *std::max_element(begin(s.u), end(s.u)); @@ -29,7 +33,11 @@ inline long double nodeCount(Search s, size_t sigma) { if (s.l[n-1] <= i and i <= s.u[n-1]) { newArray[i] = lastArray[i]; if (i > 0) { - newArray[i] += (sigma-1) * lastArray[i-1]; + if constexpr (Edit) { + newArray[i] += (sigma-1) * lastArray[i-1] + (sigma) * lastArray[i-1] + lastArray[i-1]; + } else { + newArray[i] += (sigma-1) * lastArray[i-1]; + } } acc += newArray[i]; } else { @@ -41,43 +49,13 @@ inline long double nodeCount(Search s, size_t sigma) { return acc; } -inline long double nodeCount(Scheme const& ss, size_t sigma) { +/** + * If its not edit distance, its Hamming distance + */ +template +long double nodeCount(Scheme const& ss, size_t sigma) { return std::accumulate(begin(ss), end(ss), static_cast(0.), [&](long double v, auto const& s) { - return v + nodeCount(s, sigma); + return v + nodeCount(s, sigma); }); } - -inline long double nodeCountEdit(Search s, size_t sigma) { - auto n_max = s.pi.size(); - auto e = *std::max_element(begin(s.u), end(s.u)); - - auto lastArray = std::vector(e+1, 0); - lastArray[0] = 1; - - long double acc = 0; - - auto newArray = std::vector(e+1, 0); - for (size_t n {1}; n <= n_max; ++n) { - for (size_t i{0}; i < e+1; ++i) { - if (s.l[n-1] <= i and i <= s.u[n-1]) { - newArray[i] = lastArray[i]; - if (i > 0) { - newArray[i] += (sigma-1) * lastArray[i-1] + (sigma) * lastArray[i-1] + lastArray[i-1]; - } - acc += newArray[i]; - } else { - newArray[i] = 0; - } - } - std::swap(newArray, lastArray); - } - return acc; -} - -inline long double nodeCountEdit(Scheme const& ss, int sigma) { - return std::accumulate(begin(ss), end(ss), static_cast(0.), [&](long double v, auto const& s) { - return v + nodeCountEdit(s, sigma); - }); -} - } diff --git a/src/test_search_schemes/nodeCount.cpp b/src/test_search_schemes/nodeCount.cpp index 891a950f..26e79c65 100644 --- a/src/test_search_schemes/nodeCount.cpp +++ b/src/test_search_schemes/nodeCount.cpp @@ -16,20 +16,20 @@ TEST_CASE("check node counts", "[nodeCount]") { SECTION("known length of search schemes with 0 errors") { for (size_t n{1}; n < 1000; ++n) { INFO("length(n): " << n); - CHECK(n == ss::nodeCount(gen::backtracking(n, 0, 0), 4)); + CHECK(n == ss::nodeCount(gen::backtracking(n, 0, 0), 4)); } for (size_t n{1}; n < 1000; ++n) { INFO("length(n): " << n); - CHECK(n == ss::nodeCount(ss::expand(gen::backtracking(1, 0, 0), n), 4)); + CHECK(n == ss::nodeCount(ss::expand(gen::backtracking(1, 0, 0), n), 4)); } } SECTION("known length of some search schemes") { - CHECK( 4 == ss::nodeCount(gen::backtracking(1, 0, 1), 4)); - CHECK(11 == ss::nodeCount(gen::backtracking(2, 0, 1), 4)); - CHECK(21 == ss::nodeCount(gen::backtracking(3, 0, 1), 4)); - CHECK(20 == ss::nodeCount(gen::backtracking(2, 0, 2), 4)); + CHECK( 4 == ss::nodeCount(gen::backtracking(1, 0, 1), 4)); + CHECK(11 == ss::nodeCount(gen::backtracking(2, 0, 1), 4)); + CHECK(21 == ss::nodeCount(gen::backtracking(3, 0, 1), 4)); + CHECK(20 == ss::nodeCount(gen::backtracking(2, 0, 2), 4)); } From 75712d65a704a55326e3f597105de2b1d83ced64 Mon Sep 17 00:00:00 2001 From: Simon Gene Gottlieb Date: Thu, 14 Sep 2023 10:29:33 +0200 Subject: [PATCH 3/3] patch: renaming expectedNodeCount to weightedNodeCount (and all related functions) --- src/example/main.cpp | 6 +++--- src/run_search_schemes/main.cpp | 12 ++++-------- src/search_schemes/expand.h | 8 ++++---- src/search_schemes/search_schemes.h | 2 +- .../{expectedNodeCount.h => weightedNodeCount.h} | 6 +++--- 5 files changed, 15 insertions(+), 19 deletions(-) rename src/search_schemes/{expectedNodeCount.h => weightedNodeCount.h} (92%) diff --git a/src/example/main.cpp b/src/example/main.cpp index e5fde3fc..cd8cfe4b 100644 --- a/src/example/main.cpp +++ b/src/example/main.cpp @@ -128,8 +128,8 @@ int main(int argc, char const* const* argv) { auto len = mut_queries[0].size(); auto oss = iter->second.generator(0, k, 0, 0); //!TODO last two parameters of second are not being used auto ess = search_schemes::expand(oss, len); - auto dss = search_schemes::expandDynamicExpected(oss, len, 4, 3'000'000'000); //!TODO use correct Sigma and text size - fmt::print("ss diff: {} to {}, using dyn: {}\n", search_schemes::expectedNodeCount(ess, 4, 3'000'000'000), search_schemes::expectedNodeCount(dss, 4, 3'000'000'000), config.generator_dyn); + auto dss = search_schemes::expandByWNC(oss, len, 4, 3'000'000'000); //!TODO use correct Sigma and text size + fmt::print("ss diff: {} to {}, using dyn: {}\n", search_schemes::weightedNodeCount(ess, 4, 3'000'000'000), search_schemes::weightedNodeCount(dss, 4, 3'000'000'000), config.generator_dyn); if (!config.generator_dyn) { return ess; } else { @@ -147,7 +147,7 @@ int main(int argc, char const* const* argv) { auto len = mut_queries[0].size(); auto oss = iter->second.generator(j, j, 0, 0); //!TODO last two parameters of second are not being used auto ess = search_schemes::expand(oss, len); - auto dss = search_schemes::expandDynamicExpected(oss, len, 4, 3'000'000'000); //!TODO use correct Sigma and text size + auto dss = search_schemes::expandByWNC(oss, len, 4, 3'000'000'000); //!TODO use correct Sigma and text size if (!config.generator_dyn) { return ess; } else { diff --git a/src/run_search_schemes/main.cpp b/src/run_search_schemes/main.cpp index 1deca93d..280b48a6 100644 --- a/src/run_search_schemes/main.cpp +++ b/src/run_search_schemes/main.cpp @@ -5,11 +5,7 @@ // shipped with this file. // ----------------------------------------------------------------------------------------------------- #include -#include -#include -#include -#include -#include +#include int main(int argc, char** argv) { if (argc != 4) { @@ -40,12 +36,12 @@ int main(int argc, char** argv) { auto ss = search_schemes::expand(oss, len); auto nc = [&](auto ss) { - return search_schemes::expectedNodeCount(ss, 4, 3'000'000'000); + return search_schemes::weightedNodeCount(ss, 4, 3'000'000'000); }; auto nce = [&](auto ss) { - return search_schemes::expectedNodeCount(ss, 4, 3'000'000'000); + return search_schemes::weightedNodeCount(ss, 4, 3'000'000'000); }; - auto dss = search_schemes::expandDynamicExpected(oss, len, 4, 3'000'000'000); + auto dss = search_schemes::expandByWNC(oss, len, 4, 3'000'000'000); // fmt::print("ess:\n"); diff --git a/src/search_schemes/expand.h b/src/search_schemes/expand.h index 042fe8b1..ecc16e12 100644 --- a/src/search_schemes/expand.h +++ b/src/search_schemes/expand.h @@ -9,7 +9,7 @@ #include "Scheme.h" #include "isValid.h" #include "nodeCount.h" -#include "expectedNodeCount.h" +#include "weightedNodeCount.h" #include #include @@ -188,7 +188,7 @@ inline auto expand(Scheme ss, std::vector parts) -> Scheme { } template -auto expandDynamic(Scheme ss, size_t _newLen, size_t sigma) -> Scheme { +auto expandByNC(Scheme ss, size_t _newLen, size_t sigma) -> Scheme { if (ss.size() == 0) return {}; auto additionalPos = _newLen - ss[0].pi.size(); auto counts = std::vector(ss[0].pi.size(), 1); @@ -213,7 +213,7 @@ auto expandDynamic(Scheme ss, size_t _newLen, size_t sigma) -> Scheme { } template -auto expandDynamicExpected(Scheme ss, size_t _newLen, size_t sigma, size_t N) -> Scheme { +auto expandByWNC(Scheme ss, size_t _newLen, size_t sigma, size_t N) -> Scheme { if (ss.size() == 0) return {}; auto additionalPos = _newLen - ss[0].pi.size(); auto counts = std::vector(ss[0].pi.size(), 1); @@ -225,7 +225,7 @@ auto expandDynamicExpected(Scheme ss, size_t _newLen, size_t sigma, size_t N) -> counts[j] += 1; auto ess = expand(ss, counts); counts[j] -= 1; - auto f = expectedNodeCount(ess, sigma, N); + auto f = weightedNodeCount(ess, sigma, N); if (f < bestVal) { bestVal = f; bestPos = j; diff --git a/src/search_schemes/search_schemes.h b/src/search_schemes/search_schemes.h index df4f72ec..61b84a49 100644 --- a/src/search_schemes/search_schemes.h +++ b/src/search_schemes/search_schemes.h @@ -9,8 +9,8 @@ #include "Scheme.h" #include "Search.h" #include "expand.h" -#include "expectedNodeCount.h" #include "generator/all.h" #include "isComplete.h" #include "isValid.h" #include "nodeCount.h" +#include "weightedNodeCount.h" diff --git a/src/search_schemes/expectedNodeCount.h b/src/search_schemes/weightedNodeCount.h similarity index 92% rename from src/search_schemes/expectedNodeCount.h rename to src/search_schemes/weightedNodeCount.h index c78c4880..71e6786b 100644 --- a/src/search_schemes/expectedNodeCount.h +++ b/src/search_schemes/weightedNodeCount.h @@ -22,7 +22,7 @@ namespace search_schemes { * \param N size of the reference text, ~3'000'000'000 for hg */ template -long double expectedNodeCount(Search s, size_t sigma, size_t N) { +long double weightedNodeCount(Search s, size_t sigma, size_t N) { auto n_max = s.pi.size(); auto e = *std::max_element(begin(s.u), end(s.u)); @@ -65,9 +65,9 @@ long double expectedNodeCount(Search s, size_t sigma, size_t N) { * \param N size of the reference text, ~3'000'000'000 for hg */ template -long double expectedNodeCount(Scheme const& ss, size_t sigma, size_t N) { +long double weightedNodeCount(Scheme const& ss, size_t sigma, size_t N) { return std::accumulate(begin(ss), end(ss), static_cast(0.), [&](long double v, auto const& s) { - return v + expectedNodeCount(s, sigma, N); + return v + weightedNodeCount(s, sigma, N); }); } }