1#ifndef BTLLIB_BLOOM_FILTER_HPP
-
2#define BTLLIB_BLOOM_FILTER_HPP
-
-
4#include "btllib/nthash.hpp"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
18static const uint8_t BIT_MASKS[CHAR_BIT] = {
-
-
20 0x01, 0x02, 0x04, 0x08,
-
21 0x10, 0x20, 0x40, 0x80
-
-
-
24static const char*
const BLOOM_FILTER_SIGNATURE =
"[BTLBloomFilter_v6]";
-
25static const char*
const KMER_BLOOM_FILTER_SIGNATURE =
-
26 "[BTLKmerBloomFilter_v6]";
-
27static const char*
const SEED_BLOOM_FILTER_SIGNATURE =
-
28 "[BTLSeedBloomFilter_v6]";
-
29static const char*
const HASH_FN = NTHASH_FN_NAME;
-
-
31static const unsigned MAX_HASH_VALUES = 1024;
-
32static const unsigned PLACEHOLDER_NEWLINES = 50;
-
-
35class BloomFilterInitializer
-
-
-
-
39 BloomFilterInitializer(
const std::string& path,
const std::string& signature)
-
-
-
42 , table(parse_header(signature))
-
-
-
-
46 static bool check_file_signature(std::ifstream& ifs,
-
47 const std::string& expected_signature,
-
48 std::string& file_signature);
-
-
-
-
52 std::shared_ptr<cpptoml::table> table;
-
-
54 BloomFilterInitializer(
const BloomFilterInitializer&) =
delete;
-
55 BloomFilterInitializer(BloomFilterInitializer&&) =
default;
-
-
57 BloomFilterInitializer& operator=(
const BloomFilterInitializer&) =
delete;
-
58 BloomFilterInitializer& operator=(BloomFilterInitializer&&) =
default;
-
-
-
63 std::shared_ptr<cpptoml::table> parse_header(
const std::string& signature);
-
-
-
-
-
-
-
-
-
81 BloomFilter(
size_t bytes,
unsigned hash_num, std::string hash_fn =
"");
-
-
-
-
-
-
-
-
-
-
-
-
109 void insert(
const std::vector<uint64_t>& hashes) {
insert(hashes.data()); }
-
-
-
-
128 bool contains(
const std::vector<uint64_t>& hashes)
const
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
173 void save(
const std::string& path);
-
-
175 static void save(
const std::string& path,
-
176 const cpptoml::table& table,
-
-
-
-
-
-
187 return check_file_signature(path, BLOOM_FILTER_SIGNATURE);
-
-
-
190 static bool check_file_signature(
const std::string& path,
-
191 const std::string& signature);
-
-
-
194 BloomFilter(
const std::shared_ptr<BloomFilterInitializer>& bfi);
-
-
-
-
-
-
-
-
202 size_t array_bits = 0;
-
203 unsigned hash_num = 0;
-
-
205 std::unique_ptr<std::atomic<uint8_t>[]> array;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
246 void insert(
const char* seq,
size_t seq_len);
-
-
253 void insert(
const std::string& seq) {
insert(seq.c_str(), seq.size()); }
-
-
-
-
268 void insert(
const std::vector<uint64_t>& hashes)
-
-
270 bloom_filter.
insert(hashes);
-
-
-
281 unsigned contains(
const char* seq,
size_t seq_len)
const;
-
-
-
-
292 return contains(seq.c_str(), seq.size());
-
-
-
-
-
303 return bloom_filter.
contains(hashes);
-
-
-
311 bool contains(
const std::vector<uint64_t>& hashes)
const
-
-
313 return bloom_filter.
contains(hashes);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
374 unsigned get_k()
const {
return k; }
-
-
-
-
385 void save(
const std::string& path);
-
-
-
-
394 return btllib::BloomFilter::check_file_signature(
-
395 path, KMER_BLOOM_FILTER_SIGNATURE);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
427 const std::vector<std::string>& seeds,
-
428 unsigned hash_num_per_seed);
-
-
-
-
-
-
-
-
-
-
449 void insert(
const char* seq,
size_t seq_len);
-
-
456 void insert(
const std::string& seq) {
insert(seq.c_str(), seq.size()); }
-
-
464 void insert(
const uint64_t* hashes) { kmer_bloom_filter.
insert(hashes); }
-
-
471 void insert(
const std::vector<uint64_t>& hashes)
-
-
473 kmer_bloom_filter.
insert(hashes);
-
-
-
486 std::vector<std::vector<unsigned>>
contains(
const char* seq,
-
487 size_t seq_len)
const;
-
-
498 std::vector<std::vector<unsigned>>
contains(
const std::string& seq)
const
-
-
500 return contains(seq.c_str(), seq.size());
-
-
-
-
-
512 return kmer_bloom_filter.
contains(hashes);
-
-
-
521 bool contains(
const std::vector<uint64_t>& hashes)
const
-
-
523 return kmer_bloom_filter.
contains(hashes);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
600 const std::vector<std::string>&
get_seeds()
const {
return seeds; }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
627 void save(
const std::string& path);
-
-
-
-
636 return btllib::BloomFilter::check_file_signature(
-
637 path, SEED_BLOOM_FILTER_SIGNATURE);
-
-
-
-
-
-
643 std::vector<std::string> seeds;
-
644 std::vector<SpacedSeed> parsed_seeds;
-
-
-
-
-
-
-
Definition bloom_filter.hpp:68
-
bool contains(const uint64_t *hashes) const
-
bool contains(const std::vector< uint64_t > &hashes) const
Definition bloom_filter.hpp:128
-
void insert(const std::vector< uint64_t > &hashes)
Definition bloom_filter.hpp:109
-
void insert(const uint64_t *hashes)
-
static bool is_bloom_file(const std::string &path)
Definition bloom_filter.hpp:185
-
-
const std::string & get_hash_fn() const
Definition bloom_filter.hpp:166
-
unsigned get_hash_num() const
Definition bloom_filter.hpp:162
-
BloomFilter(size_t bytes, unsigned hash_num, std::string hash_fn="")
-
void save(const std::string &path)
-
size_t get_bytes() const
Definition bloom_filter.hpp:156
-
double get_occupancy() const
-
bool contains_insert(const uint64_t *hashes)
-
BloomFilter()
Definition bloom_filter.hpp:72
-
bool contains_insert(const std::vector< uint64_t > &hashes)
Definition bloom_filter.hpp:150
-
uint64_t get_pop_cnt() const
-
BloomFilter(const std::string &path)
-
Definition bloom_filter.hpp:212
-
void insert(const char *seq, size_t seq_len)
-
unsigned contains_insert(const char *seq, size_t seq_len)
-
double get_fpr() const
Definition bloom_filter.hpp:372
-
BloomFilter & get_bloom_filter()
Definition bloom_filter.hpp:378
-
void insert(const std::vector< uint64_t > &hashes)
Definition bloom_filter.hpp:268
-
void insert(const std::string &seq)
Definition bloom_filter.hpp:253
-
unsigned get_hash_num() const
Definition bloom_filter.hpp:370
-
static bool is_bloom_file(const std::string &path)
Definition bloom_filter.hpp:392
-
unsigned contains(const char *seq, size_t seq_len) const
-
unsigned contains_insert(const std::string &seq)
Definition bloom_filter.hpp:333
-
KmerBloomFilter(const std::string &path)
-
uint64_t get_pop_cnt() const
Definition bloom_filter.hpp:366
-
bool contains_insert(const uint64_t *hashes)
Definition bloom_filter.hpp:346
-
KmerBloomFilter()
Definition bloom_filter.hpp:216
-
bool contains(const uint64_t *hashes) const
Definition bloom_filter.hpp:301
-
void insert(const uint64_t *hashes)
Definition bloom_filter.hpp:261
-
const std::string & get_hash_fn() const
Definition bloom_filter.hpp:376
-
void save(const std::string &path)
-
bool contains_insert(const std::vector< uint64_t > &hashes)
Definition bloom_filter.hpp:358
-
size_t get_bytes() const
Definition bloom_filter.hpp:364
-
double get_occupancy() const
Definition bloom_filter.hpp:368
-
unsigned contains(const std::string &seq) const
Definition bloom_filter.hpp:290
-
unsigned get_k() const
Definition bloom_filter.hpp:374
-
bool contains(const std::vector< uint64_t > &hashes) const
Definition bloom_filter.hpp:311
-
KmerBloomFilter(size_t bytes, unsigned hash_num, unsigned k)
-
Definition bloom_filter.hpp:411
-
unsigned get_total_hash_num() const
Definition bloom_filter.hpp:590
-
double get_occupancy() const
Definition bloom_filter.hpp:587
-
bool contains(const uint64_t *hashes) const
Definition bloom_filter.hpp:510
-
std::vector< std::vector< unsigned > > contains_insert(const std::string &seq)
Definition bloom_filter.hpp:550
-
void insert(const char *seq, size_t seq_len)
-
bool contains(const std::vector< uint64_t > &hashes) const
Definition bloom_filter.hpp:521
-
std::vector< std::vector< unsigned > > contains_insert(const char *seq, size_t seq_len)
-
void save(const std::string &path)
-
SeedBloomFilter(size_t bytes, unsigned k, const std::vector< std::string > &seeds, unsigned hash_num_per_seed)
-
const std::vector< SpacedSeed > & get_parsed_seeds() const
Definition bloom_filter.hpp:603
-
KmerBloomFilter & get_kmer_bloom_filter()
Definition bloom_filter.hpp:620
-
void insert(const std::vector< uint64_t > &hashes)
Definition bloom_filter.hpp:471
-
bool contains_insert(const std::vector< uint64_t > &hashes)
Definition bloom_filter.hpp:577
-
static bool is_bloom_file(const std::string &path)
Definition bloom_filter.hpp:634
-
unsigned get_hash_num_per_seed() const
Definition bloom_filter.hpp:608
-
SeedBloomFilter(const std::string &path)
-
-
uint64_t get_pop_cnt() const
Definition bloom_filter.hpp:585
-
size_t get_bytes() const
Definition bloom_filter.hpp:583
-
unsigned get_k() const
Definition bloom_filter.hpp:598
-
void insert(const uint64_t *hashes)
Definition bloom_filter.hpp:464
-
const std::vector< std::string > & get_seeds() const
Definition bloom_filter.hpp:600
-
const std::string & get_hash_fn() const
Definition bloom_filter.hpp:615
-
std::vector< std::vector< unsigned > > contains(const char *seq, size_t seq_len) const
-
SeedBloomFilter()
Definition bloom_filter.hpp:415
-
unsigned get_hash_num() const
Definition bloom_filter.hpp:613
-
std::vector< std::vector< unsigned > > contains(const std::string &seq) const
Definition bloom_filter.hpp:498
-
void insert(const std::string &seq)
Definition bloom_filter.hpp:456
-
bool contains_insert(const uint64_t *hashes)
Definition bloom_filter.hpp:564
-
+
1 #ifndef BTLLIB_BLOOM_FILTER_HPP
+
2 #define BTLLIB_BLOOM_FILTER_HPP
+
+
4 #include "btllib/nthash.hpp"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
23 static const uint8_t BIT_MASKS[CHAR_BIT] = {
+
+
25 0x01, 0x02, 0x04, 0x08,
+
26 0x10, 0x20, 0x40, 0x80
+
+
+
29 static const char*
const BLOOM_FILTER_SIGNATURE =
"[BTLBloomFilter_v6]";
+
30 static const char*
const KMER_BLOOM_FILTER_SIGNATURE =
+
31 "[BTLKmerBloomFilter_v6]";
+
32 static const char*
const SEED_BLOOM_FILTER_SIGNATURE =
+
33 "[BTLSeedBloomFilter_v6]";
+
34 static const char*
const HASH_FN = NTHASH_FN_NAME;
+
+
36 static const unsigned MAX_HASH_VALUES = 1024;
+
37 static const unsigned PLACEHOLDER_NEWLINES = 50;
+
+
40 class BloomFilterInitializer
+
+
+
+
44 BloomFilterInitializer(
const std::string& path,
const std::string& signature)
+
+
+
47 , table(parse_header(signature))
+
+
+
+
51 static bool check_file_signature(std::ifstream& ifs,
+
52 const std::string& expected_signature,
+
53 std::string& file_signature);
+
+
+
+
57 std::shared_ptr<cpptoml::table> table;
+
+
59 BloomFilterInitializer(
const BloomFilterInitializer&) =
delete;
+
60 BloomFilterInitializer(BloomFilterInitializer&&) =
default;
+
+
62 BloomFilterInitializer& operator=(
const BloomFilterInitializer&) =
delete;
+
63 BloomFilterInitializer& operator=(BloomFilterInitializer&&) =
default;
+
+
+
68 std::shared_ptr<cpptoml::table> parse_header(
const std::string& signature);
+
+
+
+
+
+
+
+
+
86 BloomFilter(
size_t bytes,
unsigned hash_num, std::string hash_fn =
"");
+
+
+
+
+
+
+
+
+
+
107 void insert(
const uint64_t* hashes);
+
+
114 void insert(
const std::vector<uint64_t>& hashes) {
insert(hashes.data()); }
+
+
124 bool contains(
const uint64_t* hashes)
const;
+
+
133 bool contains(
const std::vector<uint64_t>& hashes)
const
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
178 void save(
const std::string& path);
+
+
180 static void save(
const std::string& path,
+
181 const cpptoml::table& table,
+
+
+
+
+
+
192 return check_file_signature(path, BLOOM_FILTER_SIGNATURE);
+
+
+
195 static bool check_file_signature(
const std::string& path,
+
196 const std::string& signature);
+
+
+
199 BloomFilter(
const std::shared_ptr<BloomFilterInitializer>& bfi);
+
+
+
+
+
+
+
+
207 size_t array_bits = 0;
+
208 unsigned hash_num = 0;
+
+
210 std::unique_ptr<std::atomic<uint8_t>[]> array;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
251 void insert(
const char* seq,
size_t seq_len);
+
+
258 void insert(
const std::string& seq) {
insert(seq.c_str(), seq.size()); }
+
+
+
+
273 void insert(
const std::vector<uint64_t>& hashes)
+
+
275 bloom_filter.
insert(hashes);
+
+
+
286 unsigned contains(
const char* seq,
size_t seq_len)
const;
+
+
+
+
297 return contains(seq.c_str(), seq.size());
+
+
+
+
+
308 return bloom_filter.
contains(hashes);
+
+
+
316 bool contains(
const std::vector<uint64_t>& hashes)
const
+
+
318 return bloom_filter.
contains(hashes);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
379 unsigned get_k()
const {
return k; }
+
+
+
+
390 void save(
const std::string& path);
+
+
+
+
399 return btllib::BloomFilter::check_file_signature(
+
400 path, KMER_BLOOM_FILTER_SIGNATURE);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
432 const std::vector<std::string>& seeds,
+
433 unsigned hash_num_per_seed);
+
+
+
+
+
+
+
+
+
+
454 void insert(
const char* seq,
size_t seq_len);
+
+
461 void insert(
const std::string& seq) {
insert(seq.c_str(), seq.size()); }
+
+
469 void insert(
const uint64_t* hashes) { kmer_bloom_filter.
insert(hashes); }
+
+
476 void insert(
const std::vector<uint64_t>& hashes)
+
+
478 kmer_bloom_filter.
insert(hashes);
+
+
+
491 std::vector<std::vector<unsigned>>
contains(
const char* seq,
+
492 size_t seq_len)
const;
+
+
503 std::vector<std::vector<unsigned>>
contains(
const std::string& seq)
const
+
+
505 return contains(seq.c_str(), seq.size());
+
+
+
+
+
517 return kmer_bloom_filter.
contains(hashes);
+
+
+
526 bool contains(
const std::vector<uint64_t>& hashes)
const
+
+
528 return kmer_bloom_filter.
contains(hashes);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
605 const std::vector<std::string>&
get_seeds()
const {
return seeds; }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
633 void save(
const std::string& path);
+
+
+
+
642 return btllib::BloomFilter::check_file_signature(
+
643 path, SEED_BLOOM_FILTER_SIGNATURE);
+
+
+
+
+
+
649 std::vector<std::string> seeds;
+
650 std::vector<btllib::hashing_internals::SpacedSeed> parsed_seeds;
+
+
+
+
+
+
+
uint64_t get_pop_cnt() const
Definition: bloom_filter.hpp:590
+
unsigned contains_insert(const std::string &seq)
Definition: bloom_filter.hpp:338
+
Definition: bloom_filter.hpp:415
+
unsigned get_hash_num() const
Definition: bloom_filter.hpp:375
+
bool contains_insert(const uint64_t *hashes)
Definition: bloom_filter.hpp:569
+
double get_occupancy() const
+
std::vector< std::vector< unsigned > > contains(const std::string &seq) const
Definition: bloom_filter.hpp:503
+
void insert(const uint64_t *hashes)
Definition: bloom_filter.hpp:469
+
bool contains_insert(const std::vector< uint64_t > &hashes)
Definition: bloom_filter.hpp:155
+
size_t get_bytes() const
Definition: bloom_filter.hpp:161
+
uint64_t get_pop_cnt() const
+
void insert(const char *seq, size_t seq_len)
+
void insert(const uint64_t *hashes)
+
bool contains_insert(const std::vector< uint64_t > &hashes)
Definition: bloom_filter.hpp:582
+
void insert(const char *seq, size_t seq_len)
+
double get_occupancy() const
Definition: bloom_filter.hpp:373
+
BloomFilter & get_bloom_filter()
Definition: bloom_filter.hpp:383
+
BloomFilter()
Definition: bloom_filter.hpp:77
+
unsigned contains(const std::string &seq) const
Definition: bloom_filter.hpp:295
+
static bool is_bloom_file(const std::string &path)
Definition: bloom_filter.hpp:397
+
const std::vector< btllib::hashing_internals::SpacedSeed > & get_parsed_seeds() const
Definition: bloom_filter.hpp:608
+
static bool is_bloom_file(const std::string &path)
Definition: bloom_filter.hpp:640
+
bool contains(const std::vector< uint64_t > &hashes) const
Definition: bloom_filter.hpp:526
+
bool contains(const uint64_t *hashes) const
Definition: bloom_filter.hpp:515
+
unsigned get_hash_num() const
Definition: bloom_filter.hpp:619
+
void insert(const uint64_t *hashes)
Definition: bloom_filter.hpp:266
+
size_t get_bytes() const
Definition: bloom_filter.hpp:369
+
bool contains_insert(const uint64_t *hashes)
+
uint64_t get_pop_cnt() const
Definition: bloom_filter.hpp:371
+
std::vector< std::vector< unsigned > > contains(const char *seq, size_t seq_len) const
+
void insert(const std::string &seq)
Definition: bloom_filter.hpp:461
+
+
unsigned get_total_hash_num() const
Definition: bloom_filter.hpp:595
+
KmerBloomFilter & get_kmer_bloom_filter()
Definition: bloom_filter.hpp:626
+
void insert(const std::vector< uint64_t > &hashes)
Definition: bloom_filter.hpp:273
+
bool contains(const uint64_t *hashes) const
+
void save(const std::string &path)
+
std::vector< std::vector< unsigned > > contains_insert(const char *seq, size_t seq_len)
+
const std::string & get_hash_fn() const
Definition: bloom_filter.hpp:171
+
unsigned get_hash_num() const
Definition: bloom_filter.hpp:167
+
Definition: bloom_filter.hpp:216
+
Definition: bloom_filter.hpp:72
+
double get_fpr() const
Definition: bloom_filter.hpp:377
+
size_t get_bytes() const
Definition: bloom_filter.hpp:588
+
void insert(const std::string &seq)
Definition: bloom_filter.hpp:258
+
unsigned get_k() const
Definition: bloom_filter.hpp:379
+
void insert(const std::vector< uint64_t > &hashes)
Definition: bloom_filter.hpp:114
+
unsigned get_k() const
Definition: bloom_filter.hpp:603
+
bool contains(const std::vector< uint64_t > &hashes) const
Definition: bloom_filter.hpp:316
+
const std::string & get_hash_fn() const
Definition: bloom_filter.hpp:621
+
bool contains_insert(const std::vector< uint64_t > &hashes)
Definition: bloom_filter.hpp:363
+
void insert(const std::vector< uint64_t > &hashes)
Definition: bloom_filter.hpp:476
+
void save(const std::string &path)
+
bool contains(const uint64_t *hashes) const
Definition: bloom_filter.hpp:306
+
+
bool contains(const std::vector< uint64_t > &hashes) const
Definition: bloom_filter.hpp:133
+
const std::vector< std::string > & get_seeds() const
Definition: bloom_filter.hpp:605
+
std::vector< std::vector< unsigned > > contains_insert(const std::string &seq)
Definition: bloom_filter.hpp:555
+
double get_occupancy() const
Definition: bloom_filter.hpp:592
+
void save(const std::string &path)
+
const std::string & get_hash_fn() const
Definition: bloom_filter.hpp:381
+
SeedBloomFilter()
Definition: bloom_filter.hpp:420
+
unsigned get_hash_num_per_seed() const
Definition: bloom_filter.hpp:614
+
static bool is_bloom_file(const std::string &path)
Definition: bloom_filter.hpp:190
+
KmerBloomFilter()
Definition: bloom_filter.hpp:221
+
unsigned contains_insert(const char *seq, size_t seq_len)
+
bool contains_insert(const uint64_t *hashes)
Definition: bloom_filter.hpp:351
+
unsigned contains(const char *seq, size_t seq_len) const
diff --git a/docs/classbtllib_1_1AAHash-members.html b/docs/classbtllib_1_1AAHash-members.html
index b60df369..9117cb30 100644
--- a/docs/classbtllib_1_1AAHash-members.html
+++ b/docs/classbtllib_1_1AAHash-members.html
@@ -1,17 +1,18 @@
-
-
+
+