-
Notifications
You must be signed in to change notification settings - Fork 19
/
predictor.py
86 lines (72 loc) · 2.79 KB
/
predictor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
import scipy
from scipy.optimize import nnls
import csv
import sys
class Predictor(object):
def __init__(self, training_data_in=[], data_file=None):
'''
Initiliaze the Predictor with some training data
The training data should be a list of [mcs, input_fraction, time]
'''
self.training_data = []
self.training_data.extend(training_data_in)
if data_file:
with open(data_file, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
if row[0][0] != '#':
parts = row[0].split(',')
mc = int(parts[0])
scale = float(parts[1])
time = float(parts[2])
self.training_data.append([mc, scale, time])
def add(self, mcs, input_fraction, time):
self.training_data.append([mcs, input_fraction, time])
def predict(self, input_fraction, mcs):
'''
Predict running time for given input fraction, number of machines.
'''
test_features = np.array(self._get_features([input_fraction, mcs]))
return test_features.dot(self.model[0])
def predict_all(self, test_data):
'''
Predict running time for a batch of input sizes, machines.
Input test_data should be a list where every element is (input_fraction, machines)
'''
test_features = np.array([self._get_features([row[0], row[1]]) for row in test_data])
return test_features.dot(self.model[0])
def fit(self):
print "Fitting a model with ", len(self.training_data), " points"
labels = np.array([row[2] for row in self.training_data])
data_points = np.array([self._get_features(row) for row in self.training_data])
self.model = nnls(data_points, labels)
# TODO: Add a debug logging mode ?
# print "Residual norm ", self.model[1]
# print "Model ", self.model[0]
# Calculate training error
training_errors = []
for p in self.training_data:
predicted = self.predict(p[0], p[1])
training_errors.append(predicted / p[2])
training_errors = [str(np.around(i*100, 2)) + "%" for i in training_errors]
print "Prediction ratios are", ", ".join(training_errors)
return self.model[0]
def num_examples(self):
return len(self.training_data)
def _get_features(self, training_point):
mc = training_point[0]
scale = training_point[1]
return [1.0, float(scale) / float(mc), float(mc), np.log(mc)]
if __name__ == "__main__":
if len(sys.argv) != 2:
print "Usage <predictor.py> <csv_file_train>"
sys.exit(0)
pred = Predictor(data_file=sys.argv[1])
model = pred.fit()
test_data = [[i, 1.0] for i in xrange(4, 64, 4)]
predicted_times = pred.predict_all(test_data)
print
print "Machines, Predicted Time"
for i in xrange(0, len(test_data)):
print test_data[i][0], predicted_times[i]