-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathClassForest.ecl
95 lines (80 loc) · 2.89 KB
/
ClassForest.ecl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
IMPORT $;
IMPORT ML_Core.Preprocessing;
IMPORT ML_Core;
IMPORT LearningTrees AS LT;
IMPORT ML_Core.Discretize;
DiscreteField := ML_Core.Types.DiscreteField;
NumericField := ML_Core.Types.NumericField;
// Load the dataset
ASDDS := $.File__AllData.ASDDS;
// Define the record structure for the dataset
ASDRec := RECORD
REAL8 A1;
REAL8 A2;
REAL8 A3;
REAL8 A4;
REAL8 A5;
REAL8 A6;
REAL8 A7;
REAL8 A8;
REAL8 A9;
REAL8 A10;
REAL8 Sex;
REAL8 Jaundice;
REAL8 Family_mem_with_ASD;
REAL8 Who_completed_the_test;
REAL8 Age_Years_Normalized;
REAL8 ASD_traits;
END;
recn:=RECORD(ASDRec)
UNSIGNED rnd;
END;
// Project the dataset into the defined record structure
NewASDDS := PROJECT(ASDDS, ASDRec);
OUTPUT(NewASDDS, NAMED('NewASDDS'));
tab1:=PROJECT(ASDDS,TRANSFORM(recn,SELF.rnd:=RANDOM(),SELF:=LEFT));
tab1s:=SORT(tab1,rnd);
//OUTPUT(tab1,NAMED('tab1'));
//OUTPUT(tab1s,NAMED('tab1SORT'));
train := PROJECT(tab1s[1..2500], ASDRec);
test := PROJECT(tab1s[2501..3743], ASDRec);
//OUTPUT(train, NAMED('train'));
//OUTPUT(test, NAMED('test'));
// Append sequential IDs to the training and testing datasets
ML_Core.AppendSeqID(train, id, trainid);
ML_Core.AppendSeqID(test, id, testid);
// Output the datasets to verify the sequential IDs
OUTPUT(trainid, NAMED('Train'));
OUTPUT(testid, NAMED('Test'));
ML_Core.ToField(trainid,trainNF);
ML_Core.ToField(testid,testNF);
OUTPUT(trainNF, NAMED('trainNF'));
OUTPUT(testNF, NAMED('testNF'));
indtrainData:= trainNF(number<16);
deptrainData:=PROJECT(trainNF(number=16),
TRANSFORM(RECORDOF(left),
SELF.number:= 1,
SELF:=LEFT));
indtestData:=testNF(number<16);
deptestData:=PROJECT(testNF(number=16),
TRANSFORM(RECORDOF(left),
SELF.number:=1,
SELF:=LEFT));
indtrainData;
deptrainData;
indtestData;
deptestData;
//Convert the NumericField records for the dependent data containing class labels into DiscreteField records
depTrainDataDF := Discretize.ByRounding(depTrainData);
depTestDataDF := Discretize.ByRounding(depTestData);
OUTPUT(depTrainDataDF,NAMED('depTrainDataDF'));
OUTPUT(depTestDataDF,NAMED('depTestDataDF'));
learner := LT.ClassificationForest();
modelC := learner.GetModel(indTrainData, depTrainDataDF); // *second param uses the DiscreteField dataset
predictedClasses := learner.Classify(modelC, indTestData);
assesmentC := ML_Core.Analysis.Classification.Accuracy(predictedClasses, depTestDataDF); // Both params are DF dataset
OUTPUT(assesmentC,NAMED('assesmentC'));
fi := learner.FeatureImportance(modelC);
OUTPUT(fi,NAMED('Feature_Importance'));
confusion := learner.ConfusionMatrix(modelC, depTestDataDF, indTestData);
OUTPUT(confusion,NAMED('Confusion_matrix'));