Skip to content

Commit

Permalink
Update comments
Browse files Browse the repository at this point in the history
  • Loading branch information
friofry committed Jul 15, 2021
1 parent b48138c commit 3aa15df
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 17 deletions.
6 changes: 3 additions & 3 deletions conversion_lib/hash_conversions_x4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ uint32_t hash_to_idx_with_check(uint32_t hash, bool &out_ok)

} // namespace

// Преобразование индекса в хэш
// speed up index to hash conversion
vector<uint32_t> calc_hash_x4()
{
uint32_t total_idx_4 = 15 * 15 * 15 * 15;
Expand Down Expand Up @@ -63,13 +63,13 @@ vector<uint32_t> calc_idx_x4()
return result_indexes;
}

// посчитать хэш по индексу
// convert index to hash
uint32_t idx_to_hash_x4(uint32_t idx, const vector<uint32_t> &hash_x4)
{
return hash_x4[idx % HI_PART_15_MIN] + (hash_x4[idx / HI_PART_15_MIN] << 16);
}

// посчитать индексу по хэшу
// convert hash to index
uint32_t hash_to_idx_x4(uint32_t hash, const vector<uint32_t> &idx_x4)
{
return idx_x4[hash & 0xFFFF] + (idx_x4[hash >> 16] * HI_PART_15_MIN);
Expand Down
6 changes: 3 additions & 3 deletions stat_lib/external_generator_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ void generate_external_indexes_cpu(double max_motif_prob_simple,
{
result.reserve(TOTAL_MOT);

/// предрассчитать хэши для всех возможны 4-буквенных индексов
// precalculate hashes for 4-digit indexes
auto hash_x4 = calc_hash_x4();

for (uint32_t i = 0; i < TOTAL_MOT; i++) {
Expand Down Expand Up @@ -75,7 +75,7 @@ void generate_hashes_thread(vector<uint32_t> &result_hashes,
}
if (complementary) {
uint32_t compl_hash = to_compl_hash_reverse(hash);
// Мотив уже рассматривался (комплементарный)
// the complementary motif has been already processed
if (compl_hash < hash) {
continue;
}
Expand Down Expand Up @@ -212,7 +212,7 @@ void generate_external_hashes_cpu(double max_motif_prob_by_chance,
t.silence();
int hashes_per_sequence = stat_model.get_avg_hashes_per_sequence();

// Граничное значение вероятности появления мотива в позиции
// Boundary value of the probability of occurrence of the motif in the position
double probability_border = calculate_border_motif_probability(max_motif_prob_by_chance, hashes_per_sequence);
// printf("prob border %f %u: %f\n", max_motif_prob_by_chance, hashes_per_sequence, probability_border);

Expand Down
18 changes: 9 additions & 9 deletions stat_lib/markov_stat_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ vector<uint32_t> calc_iupac_hashes_counts(const unordered_map<uint32_t, int> &nu
return true;
};

// Перебор для каждой позиции всех вариантов iupac кодов.
// Try all iupac code variants for each position
iterate_slots(kmer_length, iupacs_per_nucl, update_all_iupacs);
}
return iupac_hashes_counter;
Expand All @@ -160,12 +160,12 @@ int kmers_in_sequences_by_length(const SequenceNums &sequence_nums, int kmer_len

uint32_t extract_kmer_hash(uint32_t hash, int pos, int length)
{
// Сделать маску
// Prepare mask
uint32_t mask = 0xFFFFFFFF;
int shift = (MOTIV_LEN - length) * 4;
mask >>= shift;

// Выделить хэш и сдвинуть в начало
// Find hash and shift bits to the beginning
int shift_pos = shift - pos * 4;
mask <<= shift_pos;
return (mask & hash) >> shift_pos;
Expand All @@ -188,14 +188,14 @@ MarkovStatModel::MarkovStatModel(const std::vector<std::string> &sequences,
throw invalid_argument("Supported levels for MarkovStatModel [1-4]");
}

// === посчитать вероятности
// === calc probability
uint32_t hashes_count = number_of_hashes(_kmer_length);
_probabilities.resize(hashes_count, _default_prob);

calc_probabilities(_kmer_length);
calc_probabilities(_kmer_length - 1);

// === предарссчитанные данные
// === precalc data
_symbols_in_key1 = (MOTIV_LEN - _kmer_length) / 2 + 1;
_symbols_in_key2 = MOTIV_LEN - _kmer_length - _symbols_in_key1 + 1;
_key_size1 = _symbols_in_key1 + _kmer_length - 1;
Expand Down Expand Up @@ -251,20 +251,20 @@ void MarkovStatModel::precalc_probabilities(int symbols_in_key, std::vector<doub
return true;
};

// Перебор для каждой позиции всех вариантов iupac кодов.
// try all iupac codes for each pos
iterate_slots(key_size, ALPH_SIZE, precalc_key);
}

void MarkovStatModel::calc_probabilities(uint32_t kmer_length)
{
// Встречаемость нуклеотидов k-меров из входных последовательностях
// Occurrence of k-mer nucleotides from the input sequences
unordered_map<uint32_t, int> nucl_hash_counters;
increment_nucl_hashes(_sequence_nums, nucl_hash_counters, kmer_length);

// Встречаемость iupac k-меров через встречаемость k-меров нуклеотидных
// Get occurrence of iupac k-mers from nucleotide k-mers occurrences
vector<uint32_t> iupac_hashes_counters = calc_iupac_hashes_counts(nucl_hash_counters, kmer_length);

// Вероятность iupac k-меров
// Get occurrence of iupac k-mers
double total_kmers_count = static_cast<double>(kmers_in_sequences_by_length(_sequence_nums, kmer_length));

for (uint32_t hash = 0; hash < iupac_hashes_counters.size(); hash++) {
Expand Down
2 changes: 1 addition & 1 deletion stat_lib/probability.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ double motif_probability_x4(unsigned int motif_hash, const std::vector<double> &
return prob;
}

/// значимость
/// chi2 criteria
double importance_chi2(
unsigned int motif_hash,
unsigned int weight,
Expand Down
2 changes: 1 addition & 1 deletion stat_lib/stat_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ int StatModel::get_avg_hashes_per_sequence() const
double StatModel::get_random_weight(uint32_t hash) const
{
double p = motif_probability_x4(hash);
//Ожидаемая встречаемость мотива в выборке в штуках
// Expected occurrence of motif in the sequences (count)
return _sequence_nums.count * (1.0 - exp(_hashes_per_sequence * log(1.0 - p)));
}

Expand Down

0 comments on commit 3aa15df

Please sign in to comment.