forked from deepaksamuel/bragg-peak
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
92 lines (71 loc) · 3.06 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#%%
from sklearn.neural_network import MLPRegressor
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
directory = "data" # the folder containing all the depth-dose data
colnames=['X', 'Y', 'Z', 'Total Dose', 'Total2','Entries']
colnames2=['Energy','p1','p2','p3']
def get_energy(file):
#print(file)
e = file.split("-")
return float(e[0])
def get_subsamples(dir="data",n_points=3, threshold=5000):
"""Takes a depth-dose data and splits it into several sections
each containing n_points. This is repeated for all dd data
inside folder dir and an array is returned. The first column of this array is the energy
and remaining n_points columns contain the dose at that energy.
If the first point in the section is below the threshold, that section is not appended to the array """
d =[]
for file in os.listdir(dir):
filename = os.fsdecode(file)
if filename.endswith(".txt"): #or filename.endswith(".py"):
#print(os.path.join(directory, filename))
energy=get_energy(filename)
df = pd.read_csv(os.path.join(dir, filename),skiprows=3,header=0,names=colnames)
#plt.plot(df['Z'],df['Total Dose'])
#print(threshold)
for i in range(0,299-n_points-1):
if(df['Total Dose'][i]>threshold):
# TODO: This works only for n_points=3. Must change to automatically accommodate all other sizes!!
dose=np.empty(n_points)
arr =np.empty(n_points+1)
arr[0] = energy
for j in range(0,n_points):
dose[j]=df['Total Dose'][i+j]
arr[j+1]=df['Total Dose'][i+j]
max=np.max(dose)
arr = arr/max
arr[0] =energy
#d.append([energy,df['Total Dose'][i]/max,df['Total Dose'][i+1]/max,df['Total Dose'][i+2]/max])
d.append(arr)
continue
else:
continue
return np.array(d)
def load_bp_data(split=75,dir="data",n_points=3, threshold=5000):
"""returns the training data and testing data
split% is used as training data and the rest for testing
the other arguments are the same as get_subsamples"""
a = range(1,n_points+1)
data = get_subsamples(dir,n_points,threshold)
e_max=np.max(data[:,[0]])
n_test = int(0.75*data.shape[0])
train = data[:n_test-1]
test = data[n_test:]
train_x = train[:,a]
train_y = train[:,[0]]/e_max
test_x = test[:,a]
test_y = test[:,[0]]/e_max
return train_x, train_y, test_x, test_y
train_x, train_y, test_x, test_y= load_bp_data(n_points=25,threshold=30000)
train_xt = np.transpose(train_x)
#plt.plot(train_xt[:,range(0,100)])
reg = MLPRegressor(solver='sgd', alpha=1e-5,hidden_layer_sizes=(100, 100), random_state=1)
reg.fit(train_x,train_y)
predictions=reg.predict(train_x)
#np.append(predictions,test_y, axis=1)
train_y = train_y.flatten()
diff = predictions-test_y
plt.scatter(train_y,predictions)