-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_loading.py
282 lines (230 loc) · 10.1 KB
/
data_loading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
from google.colab import auth
from googleapiclient.discovery import build
from google.colab import drive
import os
import random
import time
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from numpy.random import randn
from numpy.random import randint
import matplotlib.pyplot as plt
def folder_download(folder_id):
# authenticate
auth.authenticate_user()
# get folder_name
service = build('drive', 'v3')
folder_name = service.files().get(fileId=folder_id).execute()['name']
# import library and download
#!wget -qnc https://github.com/segnolin/google-drive-folder-downloader/raw/master/download.py
from download import download_folder
download_folder(service, folder_id, './', folder_name)
return folder_name
def Data_Generation(args, df):
IMAGE_CHANNELS = 3
targets = ["Eyeglasses", "Rosy_Cheeks", "Goatee"]
for col in targets:
examplesize = 12000
df_col = df[df[col]==1]
#Only sample examplesize many when there are actually that many instances
if(df_col["filename"].shape[0] >= examplesize):
listdir_x = list(df_col["filename"].sample(examplesize, replace=False, random_state=1337))
else:
listdir_x = list(df_col["filename"])
examplesize = len(listdir_x)
print(f"Attribute {col} found {examplesize} examples.")
training_binary_path = os.path.join(args.NUMPY_FILES,
f'training_data_{args.IMAGE_SIZE}_{args.IMAGE_SIZE}_{col}_{examplesize}.npy')
print(f"Looking for file: {training_binary_path}")
if not os.path.isfile(training_binary_path):
start = time.time()
print("Loading training images...")
training_data = []
faces_path = args.DATA_PATH
for filename in tqdm(listdir_x):
path = os.path.join(faces_path,filename)
image = Image.open(path).resize((args.IMAGE_SIZE, args.IMAGE_SIZE),Image.ANTIALIAS)
training_data.append(np.asarray(image))
training_data = np.reshape(training_data,(-1, args.IMAGE_SIZE, args.IMAGE_SIZE, IMAGE_CHANNELS))
# float16 saves some space instead of float32
training_data = training_data.astype(np.float16)
# rescale to [-1,1]
training_data = (training_data/127.5) - 1
print("Saving training image binary...")
np.save(training_binary_path, training_data)
elapsed = time.time()-start
print (f'Image preprocess time:',elapsed)
else:
print("Loading previous training pickle...")
training_data = np.load(training_binary_path)
def Load_Data(args):
"""
Load_Data: Load the image and labels fro training.
Arguments:
args: parser which contains all the variables and paths.
Returns:
X: Input trainng images.
y: Input training labels.
"""
# This would be the class header for 40 classes
#CLASS_HEADER = list(df.columns)[1:]
# For this experiment we only have 3 classes
CLASS_HEADER = ["Eyeglasses", "Rosy_Cheeks", "Goatee"]
# Load the Data - 3 classes
# needs to be done to initialze the dimension
X = np.load(os.path.join(args.NUMPY_FILES , "training_data_128_128_Eyeglasses_12000.npy"))
y = np.array([0]*12000)
ii=1
for klasse in CLASS_HEADER[1:]:
X = np.concatenate([X, np.load(os.path.join(args.NUMPY_FILES, f"training_data_128_128_{klasse}_12000.npy"))])
y = np.concatenate([y, np.array([ii]*12000)])
ii=ii+1
return X, y
# Nicely formatted time string
def hms_string(sec_elapsed):
h = int(sec_elapsed / (60 * 60))
m = int((sec_elapsed % (60 * 60)) / 60)
s = sec_elapsed % 60
return "{}:{:>02}:{:>05.2f}".format(h, m, s)
# select real samples
def generate_real_samples(X, y, n_samples):
# choose random instances
ix = randint(0, X.shape[0], n_samples)
# select images and labels
X, labels = X[ix], y[ix]
# generate class labels
target = np.ones((n_samples, 1))
return [X, labels], target
# select real samples as suggested in the literature
def generate_real_samples_smoothed(X, y, n_samples):
# choose random instances
ix = randint(0, X.shape[0], n_samples)
# select images and labels
X, labels = X[ix], y[ix]
# generate class labels
target = np.ones((n_samples, 1))*0.9 # change here if needed
return [X, labels], target
# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples, args):
n_classes = args.NUMBER_OF_CLASSES
# generate points in the latent space
x_input = randn(latent_dim * n_samples)
# reshape into a batch of inputs for the network
z_input = x_input.reshape(n_samples, latent_dim)
# generate labels
labels = randint(0, n_classes, n_samples)
return [z_input, labels]
# generate points in latent space for saving progress
def generate_latent_points_fix(latent_dim, n_classes):
# generate points in the latent space
x_input = randn(latent_dim * n_classes)
# reshape into a batch of inputs for the network
z_input = x_input.reshape(n_classes, latent_dim)
# generate labels
labels = np.array(list(range(n_classes)))
return z_input, labels
# use the generator to generate n fake examples, with class labels
def generate_fake_samples(generator, latent_dim, n_samples, args):
# generate points in latent space
z_input, labels_input = generate_latent_points(latent_dim, n_samples, args)
# predict outputs
#print(f"SAFE. z_input: {z_input.shape}, labels_input:{labels_input.shape}")
images = generator.predict([z_input, labels_input])
# create class labels
y = np.zeros((n_samples, 1))
return [images, labels_input], y
# here for the model with only 3 classes
def save_progress_full(generator, z, label, epoch, class_header, args, prefix = ""):
i=0
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15,7))
gen_imgs = generator.predict([z,label])
for ax in axes:
ax.imshow(gen_imgs[i] * 0.5 + 0.5)
ax.set_title(f"{class_header[i]}")
i=i+1
ax.tick_params(axis='both',which='both',bottom=False,top=False,labelbottom=False, labelleft=False, labelright=False, labeltop=False, left=False, right=False)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.suptitle(f"Epoch: {epoch}")
plt.savefig(os.path.join(args.FULL_SAVE, f"{prefix}facegen_epoch_{epoch}_full.png"), dpi=300)
#plt.show()
plt.close(fig=fig)
# here for the model with all 40 classes
def save_progress_full_40(generator, z, label, epoch, class_header, args, prefix = ""):
i=0
fig, axes = plt.subplots(nrows=8, ncols=5, figsize=(20,20))
gen_imgs = generator.predict([z,label])
for row in axes:
for ax in row:
ax.imshow(gen_imgs[i] * 0.5 + 0.5)
ax.set_title(f"{class_header[i]}")
i=i+1
ax.tick_params(axis='both',which='both',bottom=False,top=False,labelbottom=False, labelleft=False, labelright=False, labeltop=False, left=False, right=False)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.suptitle(f"Epoch: {epoch}")
plt.savefig(os.path.join(args.FULL_SAVE, f"{prefix}facegen_epoch_{epoch}_full.png"), dpi=300)
#plt.show()
plt.close(fig=fig)
# independent of amount of classes
def save_progress_variety(generator, z, label, epoch, args, prefix = "", classname=""):
i=0
fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(15,15))
gen_imgs = generator.predict([z,label])
for row in axes:
for ax in row:
ax.imshow(gen_imgs[i] * 0.5 + 0.5)
i=i+1
ax.tick_params(axis='both',which='both',bottom=False,top=False,labelbottom=False, labelleft=False, labelright=False, labeltop=False, left=False, right=False)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.suptitle(f"{classname} on epoch: {epoch}")
plt.savefig(os.path.join(args.CLASSES_SAVE, f"{classname}_{prefix}epoch_{epoch}.png"), dpi=300)
plt.close(fig=fig)
#plt.show()
"""
Weights MUST HAVE layout: "prefix_cgan_{generator, discriminator}_epoch_X.h5"
Actually DEPRECATED, because weights are loaded manually to ensure everything goes right
"""
def load_latest_model(d, g, path, prefix=""):
files = os.listdir(path)
print(files)
#print(files)
max_d = 0
max_g = 0
for file in files:
print(file.split("_")[0])
if (file.split("_")[0] == prefix):
#print(file.split("_")[2])
print(file)
if (file.split("_")[2] == "generator"):
maxx = int(file.split("_")[::-1][0].split(".")[0])
#print(maxx)
if (maxx>max_g):
max_g = maxx
if (file.split("_")[2] == "discriminator"):
maxx = int(file.split("_")[::-1][0].split(".")[0])
#print(maxx)
if (maxx>max_d):
max_d = maxx
g.load_weights(os.path.join(path,f"{prefix}_cgan_generator_epoch_{max_g}.h5"))
d.load_weights(os.path.join(path,f"{prefix}_cgan_discriminator_epoch_{max_g}.h5"))
print(f"Loaded weights for epoch: {max_g}")
return max_g
def plot_random_with_discr(d_model, g_model, epoch, args, latent_dim=100, save_pref=""):
z, label = generate_latent_points_fix(latent_dim, n_classes=3)
predd = g_model.predict([z, label])
i=0
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15,7))
gen_imgs = g_model.predict([z,label])
d_pred = d_model.predict([predd,label])
for ax in axes:
ax.imshow(gen_imgs[i] * 0.5 + 0.5)
ax.set_title(f"{CLASS_HEADER[i]}: {np.round(d_pred[i][0]*100, decimals=2)}%")
i=i+1
ax.tick_params(axis='both',which='both',bottom=False,top=False,labelbottom=False, labelleft=False, labelright=False)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.suptitle(f"Epoch {epoch} - Descriminator probabilities of being a real image")
if(save_pref):
plt.savefig(os.path.join(args.DISCR_SAVE+f"{save_pref}discriminator_epoch_{epoch}.png"), dpi=300)
#plt.show()
plt.close()