-
Notifications
You must be signed in to change notification settings - Fork 0
/
createCampaignDiscreteRegression.py
140 lines (110 loc) · 4.9 KB
/
createCampaignDiscreteRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# createCampaignDiscreteRegression provides definitions for:
# 1) data collection functions, used to obtain all currently existing data,
# a cost for collecting some specific set of experimental data, a specific
# set of new experimental data,
# 2) the types of dependent (measured) variables that can be returned,
# 3) the types and ranges of independent (experimental) variables to be explored,
# 4) the set of definitions for model construction functions,
# 5) the active learning function to be used, and,
# 6) the objective function to be optimized along with the termination criteria.
# The argument specifies how many independent variables to create.
# Useful library for handling iterators
import itertools
# Important for storing results and history of campaign runs on disk or on a remote server
from database import *
import datetime
# directories
import os
import util
#arguments:
# Number of Independent Variables
# Number of Values per Independent Variable
# Amount of Added Noise
# Number of Experiments per Round
def main(*arg):
NumberOfIvars = arg[0]
NumberPerIvar = arg[1]
AddedNoise = arg[2]
ExperimentsPerRound = arg[3]
class campaign:
def initAvars(self):
return
# this goal function stops when min confidence is >0.9 and
# R2 is greater than 0.9
def goalTether(self, arg):
from tetherCont import main
return main(arg)
# this modeler fits a linear model and estimates confidence
# in predictions from distance to nearest sampled point
def modelData(self, *arg):
from contModel import main
return main(*arg)
# this active learner chooses randomly among the points
# with estimated accuracy below 0.9 and stops when there are
# not enough points to fill a batch
def activeLearner(self, arg):
from actLearn_contModularized import main
return main(arg)
def fetchData(self, arg1, arg2):
from getGroundTruthData import main
return main(arg1, arg2)
def provideNames():
print("iVarNames:")
print(campaign.ESS.iVarNames)
print("iVarCategories:")
print(campaign.ESS.iVarCategories)
class ESS:
# define data specifications
if NumberOfIvars<1:
raise ValueError("Number of Independent Variables must be positive:" + str(NumberOfIvars))
#trailing comma creates tuple of tuples
iVars = (('int',0,NumberPerIvar),)*NumberOfIvars
#print(iVars)
iVarNames = ["iVar"+str(i) for i in range(NumberOfIvars)]
#print(iVarNames)
iVarCategories = [()]*NumberOfIvars
#print(iVarCategories)
dVars = [('int', -50, 50)]
#print(dVars)
listTuples = util.createTuples(iVars)
#listName = 'listTuples' + str(NumberOfIvars) + '_' + str(datetime.datetime.now()) + '.txt'
#print(listName)
#np.savetxt(listName, listTuples, fmt="%s")
dimarr = []
for i in range(len(iVars)):
dimarr.append(iVars[i][2] - iVars[i][1] + 1)
# constraints on data collection function(s), e.g. QC
'''
class acquisitionOptions:
replicates = [] # 4. Data acquisition function
tolerance = [] # 4. Data acquisition function
cost = [0] # 4. Data acquisition function
getcurrent = [] # 5. Data access object
'''
# all data is initialized as np.nan inside database.py
data = Database(ESS.dimarr,
'linreg_test_' + str(NumberOfIvars) + '_' +
str(datetime.datetime.now()), 0, reset=True)
confCount = ExperimentsPerRound
# define the coefficients for generating data
betas = [(i+2)**2 for i in range(NumberOfIvars)]
#print(betas)
groundTruth = generate_ground_truth_linear(ESS.listTuples,betas,AddedNoise)
class plotting:
title = "Accuracy as Fraction of Experimental Space Coverage Increased"
xlabel = "Experimental Space Coverage"
ylabel = "Accuracy"
filename = "BioActive_LinReg("+str(NumberOfIvars) + "iVar)Campaign_batchsize" + str(ExperimentsPerRound) + ".tif"
intDir = 'simsDirectory_' + str(datetime.datetime.now()) + '/'
filename = intDir + filename
C = campaign()
return C
def generate_ground_truth_linear(listTuples,betas,AddedNoise):
y = np.empty([listTuples.shape[0]])
for i in range(listTuples.shape[0]):
y[i] = sum(betas[j]*listTuples[i][j] for j in range(listTuples.shape[1]))
if AddedNoise!=0:
rng = np.random.default_rng()
stnoise = rng.standard_normal(listTuples.shape[0])
y = y + stnoise*AddedNoise
return y