-
Notifications
You must be signed in to change notification settings - Fork 3
/
dataset.py
96 lines (78 loc) · 3.31 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Import necessary libraries
import os
import glob
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import RandomFlip, RandomRotation, Dense, Dropout, Input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input as pp_i
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
# Define dataset path
images_dir = "assets/planate dataset" # Replace with the actual path to dataset
# Load and label image names
image_names = [file for file in glob.glob(os.path.join(images_dir, '*', '*.jpg'))]
labels = [os.path.basename(os.path.dirname(name)) for name in image_names]
# Encode labels into integers for classification
unique_labels = list(set(labels))
label_dict = {name: idx for idx, name in enumerate(unique_labels)}
def label_encoder(label):
return label_dict.get(label)
# Load images and encode labels
features = []
labels_encoded = []
IMAGE_SIZE = (224, 224)
for name in image_names:
label = os.path.basename(os.path.dirname(name))
label_encoded = label_encoder(label)
if label_encoded is not None:
img = load_img(name, target_size=IMAGE_SIZE)
img_array = img_to_array(img)
img_array = pp_i(img_array)
features.append(img_array)
labels_encoded.append(label_encoded)
# Convert features and labels to numpy arrays
features_array = np.array(features)
labels_array = np.array(labels_encoded)
# One-hot encode labels
label_one_hot = pd.get_dummies(labels_array).values
# Split data into train, validation, and test sets
x_train, x_test, y_train, y_test = train_test_split(features_array, label_one_hot, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)
# Data augmentation and model definition
data_augmentation = Sequential([RandomFlip("horizontal_and_vertical"), RandomRotation(0.2)])
resnet_model = ResNet50(include_top=False, pooling='avg', weights='imagenet')
resnet_model.trainable = False
# Build model
inputs = Input(shape=(224, 224, 3))
x = data_augmentation(inputs)
x = resnet_model(x, training=False)
x = Dropout(0.2)(x)
outputs = Dense(len(unique_labels), activation='softmax')(x)
model = tf.keras.Model(inputs, outputs)
# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
# Train model
model_history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=10)
# Plot accuracy and loss
epochs_range = range(10)
plt.figure(figsize=(15, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, model_history.history['accuracy'], label='Training Accuracy')
plt.plot(epochs_range, model_history.history['val_accuracy'], label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, model_history.history['loss'], label='Training Loss')
plt.plot(epochs_range, model_history.history['val_loss'], label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
# Evaluate model
model.evaluate(x_test, y_test)
# Prediction example
y_pred = model.predict(x_test)
y_pred