-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAlzheimerCNN_BrainSight.py
185 lines (148 loc) · 7.79 KB
/
AlzheimerCNN_BrainSight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import os
from datetime import datetime
from pathlib import Path
import boto3
import mlflow
import numpy as np
import pandas as pd
import splitfolders
import tensorflow as tf
import tensorflow_addons as tfa
from botocore.client import Config
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Conv2D, Dropout, MaxPool2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Create a connection to S3 using the Boto3 library
s3 = boto3.resource('s3',
endpoint_url=os.getenv('MLFLOW_S3_ENDPOINT_URL'),
aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
config=Config(signature_version='s3v4'),
region_name='us-east-1')
# Upload a file to S3 bucket
s3.Bucket('ipynb-py').upload_file(Path(__file__).name, str(datetime.now()) + Path(__file__).name)
# Set your variables for your environment
EXPERIMENT_NAME = "deep-learning-cnn-custom"
# Set tracking URI to your Heroku application
mlflow.set_tracking_uri("https://mlflow.brainsight.tech")
# Set experiment's info
mlflow.set_experiment(EXPERIMENT_NAME)
# Get our experiment info
experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
# Call mlflow autolog
mlflow.tensorflow.autolog()
# Start the experiment run
with mlflow.start_run(experiment_id=experiment.experiment_id):
# Download the dataset from S3
data_root_orig = tf.keras.utils.get_file(
origin='https://brain-disease-detector.s3.eu-west-3.amazonaws.com/alzheimer_no_split_folders_renamed.zip',
fname='/content',
cache_subdir="/content",
archive_format='zip',
extract=True)
# Get the class names from the directory
class_names = os.listdir(
'/content/alzheimer_no_split_folders_renamed')
# Split the dataset into training, validation, and testing sets using the splitfolders library
splitfolders.ratio('/content/alzheimer_no_split_folders_renamed',
output='/content/output_alzheimer_no_split_folders_renamed_v1',
ratio=(0.64, 0.16, 0.20))
image_dir_path = '.'
# Create a DataFrame with the paths to the image files
paths = [path.parts[-3:] for path in
Path(
'/content/output_alzheimer_no_split_folders_renamed_v1').rglob(
'*.jpg')]
df = pd.DataFrame(data=paths, columns=['folder', 'class', 'file_name'])
print(df.head(10))
print(df.tail(10))
# Get the number of images in each folder/class of the training, validation, and testing sets
df.groupby(['folder', 'class']).size()
df[df["folder"] == "train"].groupby(["class"]).size() / len(df[df["folder"] == "train"])
df[df["folder"] == "val"].groupby(["class"]).size() / len(df[df["folder"] == "val"])
df[df["folder"] == "test"].groupby(["class"]).size() / len(df[df["folder"] == "test"])
# Define the ImageDataGenerator objects for the training, validation, and testing sets
train_image_generator = ImageDataGenerator(rescale=1 / 255)
val_image_generator = ImageDataGenerator(rescale=1 / 255)
test_image_generator = ImageDataGenerator(rescale=1 / 255)
# Create the training, validation, and testing datasets
train_dataset = train_image_generator.flow_from_directory(batch_size=8,
directory='/mnt/c/Users/Laure/Documents/Dev/ProjetOpenBrain/content/output_alzheimer_no_split_folders_renamed_v1/train',
shuffle=True,
target_size=(176, 208),
class_mode='categorical')
validation_dataset = val_image_generator.flow_from_directory(batch_size=8,
directory='/mnt/c/Users/Laure/Documents/Dev/ProjetOpenBrain/content/output_alzheimer_no_split_folders_renamed_v1/val',
shuffle=True,
target_size=(176, 208),
class_mode='categorical')
test_dataset = test_image_generator.flow_from_directory(batch_size=1300,
directory='/mnt/c/Users/Laure/Documents/Dev/ProjetOpenBrain/content/output_alzheimer_no_split_folders_renamed_v1/test',
shuffle=True,
target_size=(176, 208))
# Get the images and labels from the training, validation, and testing datasets
train_images, train_labels = train_dataset.next()
validation_images, validation_labels = validation_dataset.next()
test_images, test_labels = test_dataset.next()
# Get the class indices from the training, validation, and testing datasets
train_dataset.class_indices
test_dataset.class_indices
# Create the model
model = tf.keras.Sequential([
Conv2D(filters=16, kernel_size=(3, 3), strides=1, padding="same",
activation="relu", input_shape=(176, 208, 3)), # the input shape (height, width, channels)
MaxPool2D(pool_size=2, # the size of the pooling window
strides=2), # the movement of the pooling on the input
Dropout(0.1),
Conv2D(filters=32, kernel_size=(3, 3), strides=1, padding="same",
activation="relu"),
MaxPool2D(2, 2),
Dropout(0.1),
Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding="same",
activation="relu"),
MaxPool2D(2, 2),
Dropout(0.1),
tf.keras.layers.Flatten(), # this layer turns multi-dimensional images into flat objects
tf.keras.layers.Dense(128, activation="relu"), # the number of neurons in the layer
Dropout(0.1),
tf.keras.layers.Dense(64, activation="relu"),
tf.keras.layers.Dense(4, activation="softmax")
]
)
model.summary()
# Defining metrics and optimization
METRICS = [
tf.keras.metrics.CategoricalAccuracy(name='acc'),
tf.keras.metrics.AUC(name='auc'),
tfa.metrics.F1Score(num_classes=4, average='macro', name='f1_score')
]
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
patience=3,
restore_best_weights=False)
initial_learning_rate = 0.0005
# Define the learning rate schedule
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate,
decay_steps=1000,
decay_rate=0.96,
staircase=True)
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=METRICS)
EPOCHS = 50
# Train the model
history = model.fit(train_dataset, validation_data=validation_dataset, shuffle=True, epochs=EPOCHS)
train_dataset.class_indices.values()
type(train_dataset.class_indices.values())
# Evaluation on validation dataset
_ = model.evaluate(validation_dataset)
# Evaluation on test dataset
_ = model.evaluate(test_dataset)
test_images, test_labels = test_dataset.next()
test_dataset.class_indices.keys()
predicted_labels = model.predict(test_dataset)
predicted_labels = np.argmax(predicted_labels, axis=1)
test_labels = np.argmax(test_labels, axis=1)
target_names = [k + ' : ' + str(v) for k, v in test_dataset.class_indices.items()]
print(classification_report(test_labels, predicted_labels, target_names=target_names))