-
Notifications
You must be signed in to change notification settings - Fork 0
/
ft_linear_regression.py
105 lines (79 loc) · 2.93 KB
/
ft_linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
class ft_linear_regression():
def __init__(self, thetas, alpha=0.001, n_cycle=1000):
self.alpha = alpha
self.n_cycle = int(n_cycle)
self.thetas = self.__parseThetas(thetas)
def __parseThetas(self, thetas):
if type(thetas) == list:
return np.asarray(thetas, dtype=np.float32).reshape(len(thetas), 1)
elif type(thetas) == np.ndarray and len(thetas) != thetas.shape:
return thetas.reshape(thetas.shape[0], 1)
else:
raise TypeError("thetas has to be -list- or -numpy.ndarray- and contain 2 elements")
def __isEmpty(self, *arg):
for data in arg:
if not hasattr(data, 'shape'):
return True
elif data.shape[0] == 0:
return True
return False
def __dimensionsMatch(self, data0, data1):
if data0.shape[0] != data1.shape[0]:
return False
return True
def __addIntercept(self, data):
return np.concatenate((np.ones(data.shape[0]).reshape(data.shape[0], 1), data), axis=1)
def __parseData(self, data):
return data.reshape(len(data), 1)
def __zscore(self, data):
std = np.std(data, axis=0)
mean = np.mean(data, axis=0)
M0 = np.full(data.shape, mean)
M1 = np.full(data.shape, 1/std)
return (data - M0) * M1
def predict_(self, x):
if self.__isEmpty(x) is True:
return None
x = self.__addIntercept(x)
result = x.dot(self.thetas)
return result
def cost_elem_(self, x, y):
if self.__isEmpty(x, y) is True:
return None
elif self.__dimensionsMatch(x, y) is False:
return None
x = self.predict_(x)
length = x.shape[0]
value = np.power(x - y, 2)/(2 * length)
return value
def cost_(self, x, y):
result = self.cost_elem_(x, y)
if result is None:
return None
return np.sum(result)
def fit_(self, x, y, *arg, **kwargs):
if self.__isEmpty(x, y) is True:
return None
elif self.__dimensionsMatch(x, y) is False:
return None
alpha = kwargs.get('alpha', self.alpha)
n_cycle = int(kwargs.get('n_cycle', self.n_cycle))
x = self.__addIntercept(x)
y = self.__parseData(y)
length = x.shape[0]
for i in range(0, n_cycle):
oldThetas = self.thetas[:]
self.thetas = self.thetas - (1/length) * alpha * x.transpose().dot((x.dot(self.thetas) - y))
if np.array_equal(self.thetas, oldThetas):
break
return self.thetas
def mse_(self, x, y):
if self.__isEmpty(x, y) is True:
return None
elif self.__dimensionsMatch(x, y) is False:
return None
x = self.predict_(x)
length = x.shape[0]
value = np.power(x - y, 2)/(length)
return np.sum(value)