Skip to content

Commit

Permalink
Optimize Median method from TC O(N log N) to Linear Time Complexity (#23
Browse files Browse the repository at this point in the history
)

* refactor[DataTomeAnalysis]!: Optimize Median method from TC O(N log N) to O(N) using IntroSort Algorithm
Median now uses introSelect algorithm merging between quickSelect and medianOfMedians Algorithm
feat[DataTomeUtils]: Adding dt_min helper function to get minimum value between two values and swap to swap two values
resolves (#18)

* fix[DataTomeAnalysis]: renaming min to dt_min to use utility minimum function instead of cpp std lib

* fix:[DataTomeUtils]: Refactoring dt_min utility function
  • Loading branch information
mohammedelgammal authored Dec 16, 2024
1 parent d90b8dd commit 3f601cb
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 20 deletions.
88 changes: 69 additions & 19 deletions src/DataTomeAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,26 +35,26 @@ class DataTomeAnalysis : public DataTomeMvAvg<TypeOfArray, TypeOfSum> {
return result;
}

TypeOfArray median() {
TypeOfArray median = 0;
size_t current_size = this->point_count();

TypeOfArray *temp =
(typeof(temp))malloc(current_size * sizeof(typeof(temp)));

memcpy(temp, this->_array, current_size * sizeof(TypeOfArray));

qsort(temp, current_size, sizeof(TypeOfArray), sort_ascend<TypeOfArray>);

if (current_size % 2 == 0) {
median = (temp[current_size / 2 - 1] + temp[current_size / 2]) / 2;
} else {
median = temp[current_size / 2];
double median() {
double calculated_median;
size_t current_size = this->point_count(),
m = current_size / 2;
TypeOfArray *temp_array = (TypeOfArray *)malloc(sizeof(TypeOfArray) * current_size);

memcpy(temp_array, this->_array, sizeof(TypeOfArray) * current_size);

if (current_size % 2 == 0)
{
size_t m1 = quickSelect(0, current_size - 1, m - 1, temp_array),
m2 = quickSelect(0, current_size - 1, m, temp_array);
calculated_median = (m1 + m2) / 2.0;
}

free(temp);

return median;
else
{
calculated_median = quickSelect(0, current_size - 1, m, temp_array);
}
free(temp_array);
return calculated_median;
}

TypeOfArray lowest_mode() {
Expand Down Expand Up @@ -275,6 +275,56 @@ class DataTomeAnalysis : public DataTomeMvAvg<TypeOfArray, TypeOfSum> {
return sqrt(partial_var(partial_id) /
this->partial_point_count(partial_id));
}

private:
TypeOfArray medianOfMedians(int l, int r, TypeOfArray nums[]) {
int k = 5;
size_t size = r - l + 1,
medians_size = (size + k - 1) / k;
TypeOfArray medians[medians_size];
for (int i = l, m_count = 0; i <= r; i += k, m_count++)
{
int left = i, right = dt_min(i + k, r + 1),
mid = left + (right - left) / 2;
qsort(nums + left, right - left, sizeof(TypeOfArray), sort_ascend<TypeOfArray>);
medians[m_count] = (nums[mid]);
}
qsort(medians, medians_size, sizeof(TypeOfArray), sort_ascend<TypeOfArray>);
return medians[medians_size / 2];
};

TypeOfArray quickSelect(int l, int r, int m, TypeOfArray nums[]) {
int pivot = medianOfMedians(l, r, nums),
index = r,
p = l;
for (int i = l; i <= r; i++)
{
if (nums[i] == pivot)
{
index = i;
swap(nums[index], nums[r]);
break;
}
}
for (int i = l; i < r; i++)
{
if (nums[i] < nums[r])
{
swap(nums[i], nums[p]);
p += 1;
}
}
swap(nums[p], nums[r]);
if (p > m)
{
return quickSelect(l, p - 1, m, nums);
}
else if (p < m)
{
return quickSelect(p + 1, r, m, nums);
}
return nums[p];
};
};

#endif // DATA_TOME_ANALYSIS_H
14 changes: 13 additions & 1 deletion src/DataTomeUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,16 @@ int sort_ascend(const void *cmp1, const void *cmp2) {
return (int)a - b;
}

#endif // DATA_TOME_UTILS_H
template <typename TypeOfArray>
void swap(TypeOfArray &a, TypeOfArray &b) {
TypeOfArray temp = a;
a = b;
b = temp;
}

template <typename T>
const T &dt_min(const T &a, const T &b) {
return (a < b) ? a : b;
}

#endif // DATA_TOME_UTILS_H

0 comments on commit 3f601cb

Please sign in to comment.