-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregressionclassifier.py
69 lines (53 loc) · 2.42 KB
/
regressionclassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeRegressor
# paste your get_features_targets function here
def get_features_targets(data):
features = np.zeros((data.shape[0], 4))
features[:, 0] = data['u'] - data['g']
features[:, 1] = data['g'] - data['r']
features[:, 2] = data['r'] - data['i']
features[:, 3] = data['i'] - data['z']
targets = data['redshift']
return features, targets
# paste your median_diff function here
def median_diff(predicted, actual):
return np.median(np.abs(predicted - actual))
# paste your cross_validate_model function here
def cross_validate_model(model, features, targets, k):
kf = KFold(n_splits=k, shuffle=True)
# initialise a list to collect median_diffs for each iteration of the loop below
diffs = []
for train_indices, test_indices in kf.split(features):
train_features, test_features = features[train_indices], features[test_indices]
train_targets, test_targets = targets[train_indices], targets[test_indices]
# fit the model for the current set
model.fit(train_features, train_targets)
# predict using the model
predictions = model.predict(test_features)
# calculate the median_diff from predicted values and append to results array
diffs.append(median_diff(predictions, test_targets))
# return the list with your median difference values
return diffs
# complete this function
def split_galaxies_qsos(data):
# split the data into galaxies and qsos arrays
galaxies = data[data['spec_class'] == b'GALAXY']
qsos = data[data['spec_class'] == b'QSO']
# return the seperated galaxies and qsos arrays
return galaxies, qsos
def cross_validate_median_diff(data):
features, targets = get_features_targets(data)
dtr = DecisionTreeRegressor(max_depth=19)
return np.mean(cross_validate_model(dtr, features, targets, 10))
if __name__ == "__main__":
data = np.load('./sdss_galaxy_colors.npy')
# split the data set into galaxies and QSOs
galaxies, qsos= split_galaxies_qsos(data)
# here we cross validate the model and get the cross-validated median difference
# the cross_validated_med_diff function is in "written_functions"
galaxy_med_diff = cross_validate_median_diff(galaxies)
qso_med_diff = cross_validate_median_diff(qsos)
# print the results
print("Median difference for Galaxies: {:.3f}".format(galaxy_med_diff))
print("Median difference for QSOs: {:.3f}".format(qso_med_diff))