-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.cpp
106 lines (88 loc) · 3.41 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#include <iostream>
#include <tuple>
#include "src/random_forest_classifier.h"
#include <random>
void adecvatnost_test() {
std::vector<std::vector<double>> X = {{0, 0},
{0, 1},
{0.5, 0.5},
{1, 0},
{1, 1}};
std::vector<int> y = {0, 1, 1, 1, 0};
decision_tree_classifier clf(2);
clf.fit(X, y);
random_forest_classifier forest(10, 2);
forest.fit(X, y);
auto pred = clf.predict(X);
auto forest_pred = forest.predict(X);
assert(pred == y);
assert(forest_pred == y);
}
bool in_circle(double x, double y) {
return x * x + y * y <= 1;
}
std::tuple<std::vector<std::vector<double>>, std::vector<int>>
make_binary_points_classification(size_t num, bool (* condition)(double x, double y), int x_min = -2, int x_max = 2,
int y_min = -2,
int y_max = 2) {
std::vector<std::vector<double>> X(num);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> xs(x_min, x_max);
std::uniform_real_distribution<> ys(y_min, y_max);
for (size_t i = 0; i < num; ++i) {
X[i] = {xs(gen), ys(gen)};
}
std::vector<int> y(num);
for (size_t i = 0; i < num; ++i) {
y[i] = condition(X[i][0], X[i][1]);
}
return {X, y};
}
void accuracy_on_training_set_test() {
size_t num_of_points = 100;
auto[X, y] = make_binary_points_classification(num_of_points, in_circle);
decision_tree_classifier clf(2, X, y);
auto pred = clf.predict(X);
random_forest_classifier forest(10, 2);
forest.fit(X, y);
auto forest_pred = forest.predict(X);
size_t correct = 0;
size_t forest_correct = 0;
for (size_t i = 0; i < num_of_points; ++i) {
correct += (pred[i] == y[i]);
forest_correct += (forest_pred[i] == y[i]);
}
double acc = static_cast<double>(correct) / static_cast<double>(num_of_points);
double forest_acc = static_cast<double>(forest_correct) / static_cast<double>(num_of_points);
std::cout << "accuracy of tree on training set = " << acc << '\n';
std::cout << "accuracy of forest on training set = " << forest_acc << '\n' << '\n';
assert(acc >= 0.9);
}
void accuracy_on_real_test() {
size_t num_of_points = 100;
auto[X, y] = make_binary_points_classification(num_of_points, in_circle);
decision_tree_classifier clf(2);
clf.fit(X, y);
random_forest_classifier forest(10, 2);
forest.fit(X, y);
auto[test_X, test_y] = make_binary_points_classification(num_of_points, in_circle);
auto pred = clf.predict(test_X);
auto forest_pred = forest.predict(test_X);
size_t correct = 0;
size_t forest_correct = 0;
for (size_t i = 0; i < num_of_points; ++i) {
correct += (pred[i] == test_y[i]);
forest_correct += (forest_pred[i] == test_y[i]);
}
double acc = static_cast<double>(correct) / static_cast<double>(num_of_points);
double forest_acc = static_cast<double>(forest_correct) / static_cast<double>(num_of_points);
std::cout << "accuracy of tree on real test = " << acc << '\n';
std::cout << "accuracy of forest on real test = " << forest_acc << '\n' << '\n';
assert(acc >= 0.8);
}
int main() {
adecvatnost_test();
accuracy_on_training_set_test();
accuracy_on_real_test();
}