Skip to content

Commit

Permalink
fix: add missing functions
Browse files Browse the repository at this point in the history
  • Loading branch information
SGSSGene committed Jun 30, 2023
1 parent 359d951 commit 4d57765
Showing 1 changed file with 69 additions and 0 deletions.
69 changes: 69 additions & 0 deletions src/fmindex-collection/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,73 @@ auto createSequences_32(Sequences auto const& _input, int samplingRate, bool rev
}


inline auto createSA_32(std::span<uint32_t const> input, size_t threadNbr) -> std::vector<int32_t> {
auto sa = std::vector<int32_t>(input.size());
if (input.size() == 0) {
return sa;
}
#if LIBSAIS_OPENMP
auto r = libsais_int_omp((int32_t*)input.data(), sa.data(), input.size(), 0, nullptr, threadNbr);
#else
(void)threadNbr; // Unused if no openmp is available
auto r = libsais_int((int32_t*)input.data(), sa.data(), input.size(), 65536, 0);
#endif

if (r != 0) { throw std::runtime_error("something went wrong constructing the SA"); }
return sa;
}


inline auto createBWT_32(std::span<uint32_t const> input, std::span<int32_t const> sa) -> std::vector<uint32_t> {
assert(input.size() == sa.size());
auto bwt = std::vector<uint32_t>{};
bwt.resize(input.size());
for (size_t i{0}; i < sa.size(); ++i) {
bwt[i] = input[(sa[i] + input.size() - 1) % input.size()];
}
return bwt;
}

auto createSequences_32(Sequences auto const& _input, int samplingRate, bool reverse=false) -> std::tuple<size_t, std::vector<uint32_t>, std::vector<std::tuple<size_t, size_t>>> {
// compute total numbers of bytes of the text including delimiters "$"
size_t totalSize{};
for (auto const& l : _input) {
auto textLen = l.size();
auto delimLen = samplingRate - textLen % samplingRate; // Make sure it is always a multiple of samplingRate
totalSize += textLen + delimLen;
}

// our concatenated sequences with delimiters
auto inputText = std::vector<uint32_t>{};
inputText.reserve(totalSize);

// list of sizes of the individual sequences
auto inputSizes = std::vector<std::tuple<size_t, size_t>>{};
inputSizes.reserve(_input.size());


for (auto const& l : _input) {
auto ls = l.size();
// number of delimiters ('$') which need to be added. It must be at least one, and it
// has to make sure the text will be a multiple of samplingRate
size_t delimCount = samplingRate - (ls % samplingRate);
inputText.resize(inputText.size() + ls + delimCount, 0);

if (not reverse) {
std::ranges::copy(l, end(inputText) - ls - delimCount);
} else {
//!TODO hack for clang, broken in clang 15
#if __clang__
auto l2 = std::vector<uint32_t>(l);
std::ranges::reverse(l2);
#else
auto l2 = std::views::reverse(l);
#endif
std::ranges::copy(l2, end(inputText) - ls - delimCount);
}

inputSizes.emplace_back(l.size(), delimCount);
}
return {totalSize, inputText, inputSizes};
}
}

0 comments on commit 4d57765

Please sign in to comment.