forked from anotherLostKitten/paJAMeterServer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
binary_classifier.c
136 lines (104 loc) · 4.25 KB
/
binary_classifier.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// Imports
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include "mnist_constants.h"
double calculate_L2_norm(double* vector, int size) {
// Init the double sum
double sum_of_squares = 0.0;
// Sum the squares
for (int i = 0; i < size; i++) {
sum_of_squares += vector[i] * vector[i];
}
// Calculate the square root
return sqrt(sum_of_squares);
}
// Function to generate random weight between -0.5 and .5
double generate_random_weight() {
return (((double) rand()) / ((double) RAND_MAX)) -.5 ;
}
// Dot product function for vectors
double dot_product(double* a, double* b, int size) {
double result = 0.;
for (int i = 0; i < size; i++) {
result += a[i] * b[i];
}
return result;
}
void gradient(double* gradient_output, double* x, double* w, int label) {
// Note n is FEATURE_SIZE_W_BIAS since that is the feature size of our image
// y is 1x1 value -- either true or false
// x is (D+1)x1 array -- the pixels of the image as an array - D is number of features - +1 because of bias
// w is (D+1)x1 array -- we automatically assume the transpose
// y_hat is by definition wT*x = y_hat
// Since our convention has w as Dx1 we can just use dot product
// The bias is a 1x1 value
double y_hat[10], sum = 0;
for (int n = 0; n < 10; n++)
sum += y_hat[n] = exp(dot_product(x, &w[n * FEATURE_SIZE_W_BIAS], FEATURE_SIZE_W_BIAS));
for (int n = 0; n < 10; n++)
y_hat[n] = y_hat[n] / sum - (label == n ? 1. : 0.);
// Calculate the gradient vector -- this is a (D+1)x1 sized vector
for (int i = 0; i < FEATURE_SIZE_W_BIAS; i++)
for (int n = 0; n < 10; n++)
gradient_output[i + FEATURE_SIZE_W_BIAS * n] = x[i] * y_hat[n];
}
// Function used to add a new column to the features matrix - a bias column
void add_bias_column(uint8_t* x, double* x_with_bias) {
for (int i = 0; i < FEATURE_SIZE_W_BIAS; i++) {
if (i == 0)
x_with_bias[i] = 1.;
else
x_with_bias[i] = (double)x[i] / 255.;
}
}
// Initialize weights needed for gradient descent - add an extra row for bias
void init_weights(double* w){
for (int i = 0; i < FEATURE_SIZE_W_BIAS; i++)
w[i] = generate_random_weight();
}
double gradient_output[FEATURE_SIZE_W_BIAS * 10];
// Run Gradient Descent
void run_gradient_descent(double learning_rate, struct image_data* datum, double* w){
// Step 1: Add a bias column to the feature vector - ONLY USE x_with_bias from here
double x[FEATURE_SIZE_W_BIAS];
add_bias_column(datum->data, x);
// Calculate the gradient
gradient(gradient_output, x, w, datum->label);
// Perform gradient descent -- aka update the weights based on the gradient
for (int i = 0; i < FEATURE_SIZE_W_BIAS * 10; i++)
w[i] -= learning_rate * gradient_output[i];
// // Step 3: Calculate the l2 norm in order to get the model error
// double error = calculate_L2_norm(gradient_output, FEATURE_SIZE_W_BIAS * 10);
// // Step 4: Print out the error
// printf("The model error is: %lf\n", error);
}
int main() {
struct image_data* data = read_data(MNIST_TRAINING_IMAGES, "training_sets/train-images.idx3-ubyte", "training_sets/train-labels.idx1-ubyte");
double w[FEATURE_SIZE_W_BIAS * 10];
for (int i=0; i < FEATURE_SIZE_W_BIAS * 10; i++)
w[i] = generate_random_weight();
for (int i=0; i < MNIST_TRAINING_IMAGES; i++)
run_gradient_descent(LEARNING_RATE, &data[i], w);
free(data);
data = read_data(MNIST_TESTING_IMAGES, "training_sets/t10k-images.idx3-ubyte", "training_sets/t10k-labels.idx1-ubyte");
int success = 0;
for (int img = 0; img < MNIST_TESTING_IMAGES; img++) {
double maxc = 0;
int maxn = -1;
for (int n = 0; n < 10; n++) {
double conf = w[785 * n + 784];
for (int i = 0; i < 784; i++)
conf += w[785 * n + i] * (double)data[img].data[i] / 255.0;
if (conf > maxc) {
maxc = conf;
maxn = n;
}
}
if (maxn == data[img].label)
success++;
}
free(data);
printf("accuracy: %d / %d (%lf%%)\n", success, MNIST_TESTING_IMAGES, (double)success / (double)MNIST_TESTING_IMAGES * 100.0);
}