-
Notifications
You must be signed in to change notification settings - Fork 0
/
day_7_dhruvdhayal_ai_ml.py
141 lines (104 loc) · 4.63 KB
/
day_7_dhruvdhayal_ai_ml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# -*- coding: utf-8 -*-
"""Day-7_DHRUVDHAYAL_AI/ML.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1npzLgzx3jal5ATGM0UjBN9wa1N6P346t
#Decision Tree Making!
"""
#Using the Decision Tree Concepts!
#Importing all theBuilt-In Libraries.
from sklearn import datasets;
from sklearn import svm, tree;
from sklearn import metrics;
from sklearn import model_selection;
import matplotlib.pyplot as plt;
import numpy as np;
import pandas as pd;
#Load the Values of the Datasets.
data=datasets.load_iris();
#Show the Values of the Data.
print("\n 1. Type of the Data Represented: ",type(data));
print("\n 2. Description of the Data complete: ",data);
#Now, we need to defined the load the data properly!
X=data.data;
y=data.target;
print("\n 1. Type of the Attributes: ",type(X));
print("\n 2. Total Length of the Attributes: ",len(X));
print("\n 3. Type of the Labels: ",type(Y));
print("\n 4. Total Length of the Labels: ",len(Y));
#Now, we need to split the values of the Data.
ratio=0.3;
Xtrain,Xtest,ytrain,ytest=model_selection.train_test_split(X,y,test_size=ratio,random_state=7);
print("\n 1. Training Data: ",Xtrain.shape," ",ytrain.shape);
print("\n 2, Testing Data: ",Xtest.shape," ",ytrain.shape);
#Now, we needto train the model in a proper Manner.
#Classification of the Model.
clff_model=tree.DecisionTreeClassifier(criterion='entropy',max_depth=None);
#Training the Model to become the Trained Data.
clff_model=clff_model.fit(Xtrain,ytrain);
#Now, we need to test the data based on it's labels.
#predict the training data for training accuracy.
ypredTrain=clff_model.predict(Xtrain);
#Predict the testing data for testing accuracy.
ypredTest=clff_model.predict(Xtest);
print("\n --> Predicted Training Data : ",ypredTrain.shape);
print("\n --> Predicted Testing Data : ",ypredTest.shape);
#Now, we need to calculate the values of the accuracy, precision, recall, f1-score.
#We, need to measure the Training and Testing Accuracy.
accTrain=metrics.accuracy_score(ypredTrain,ytrain);
accTest=metrics.accuracy_score(ypredTest,ytest);
print("\n --> Training Accuracy: ",accTrain);
print("\n --> Testing Accuracy: ",accTest);
#Now, we need to show the values of the Graph in decision forms.
import matplotlib.pyplot as plt;
plt.figure(1,figsize=(5,10));
tree.plot_tree(clff_model,filled=True);
plt.show();
"""# >> Now, we need to perform the decision tree to find the actual values on the Diabetes Datasets."""
#Importing all the built in Libraries in sklearn.
from sklearn import datasets;
from sklearn import svm;
from sklearn import metrics;
from sklearn import model_selection;
#Importing the another Libraries.
import matplotlib.pyplot as plt;
#import matplotlib.image as plt;
import pandas as pd;
import numpy as np;
#Now, we need to show the actual Values.
path='/content/diabetes.csv';
data=pd.read_csv(path);
#Showing the Values of the Data.
print("\n 1. Type of the Information Available in the Datasets of Diabetes: ",type(data));
print("\n 2. Complete Description of the Data Given Here: ",data.shape);
print("\n------------------------------------------------------------------------------------");
print("\n 3. Show the Data with Descriptions: \n\n",data);
#Now, we need to specify the data with a particular Locations.
x=data.drop('Outcome',axis=1,inplace=False);
y=data['Outcome'];
#Now, we need to split the data in training and testing.
ratio=0.3;
Xtrain,Xtest,ytrain,ytest=model_selection.train_test_split(x,y,test_size=ratio,random_state=7); # Change X to x
print("\n 1. Training Data Values: ",Xtrain.shape," ",ytrain.shape);
print("\n 2. Testing Data Values: ",Xtest.shape," ",ytest.shape);
#Now, we need to classify the Model.
#Proper Classification should be done on time!
clff_model=tree.DecisionTreeClassifier(criterion='entropy',max_depth=5);
#Training the Values of the Models.
clff_model=clff_model.fit(Xtrain,ytrain);
#Predict the values of the Training Data Values.
ypredTrain=clff_model.predict(Xtrain);
#Predict the Values of the Testing Data Values.
ypredTest=clff_model.predict(Xtest);
print("\n --> Traning Data Prediction: ",ypredTrain.shape);
print("\n --> Testing Data Prediction: ",ypredTest.shape);
#Now, we need to calculate the overall accuracy of the Model in testing and training seperately!
accTrain=metrics.accuracy_score(ypredTrain,ytrain);
accTest=metrics.accuracy_score(ypredTest,ytest);
#Printing all overall values.
print("\n --> Training Accuracy: ",accTrain);
print("\n --> Testing Accuracy: ",accTest);
#Now, we need to show the values of the Graph forms!
plt.figure(1);
tree.plot_tree(clff_model,filled=True);
"""#Now, we make the direct acess data from the Google Drive Directly!"""