-
Notifications
You must be signed in to change notification settings - Fork 0
/
oxtfinput.py
283 lines (215 loc) · 9.27 KB
/
oxtfinput.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
import os
from matplotlib.image import imread
import tensorflow as tf
import numpy as np
import dataset
# https://www.youtube.com/watch?v=oxrcZ9uUblI funny and good tutorial on tensorflow records
def create_tfrecords(name='Cifar10', datadir=os.path.expanduser('~/Cifar10')):
# name: Name of dataset
# datadir: Directory to store data in
# returns: tuple of train and test record paths
# targets for efficient tensorflow record files
train_record_path = os.path.join(datadir, 'train.tfrecord')
test_record_path = os.path.join(datadir, 'test.tfrecord')
# run data conversion
if name == 'OxfordFlower':
# get or retrieve data/paths, oxford is already jpegs
data = dataset.Dataset(name, datadir)
convert(
*data.get_train_data(), train_record_path
)
convert(
*data.get_test_data(), test_record_path
)
elif name == 'Cifar10':
# have to get data from pregenerated batches...
convert_from_data(
dataset.yield_cifar_10_dataset(datadir, train=True),
train_record_path
)
convert_from_data(
dataset.yield_cifar_10_dataset(datadir, train=True),
test_record_path
)
return train_record_path, test_record_path
def wrap_bytes(value):
# converts raw image data into tf object
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def wrap_int64(value):
# converts int to list of 1 int then to tf object
if hasattr(value, '__iter__'):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
else:
return wrap_int64([value])
def convert(image_paths, labels, record_path):
# Open each image and write it into TFRecord.
with tf.python_io.TFRecordWriter(record_path) as writer:
for path, label in zip(image_paths, labels):
# load image data
with open(path, 'rb') as f:
img = imread(f)
img = centered_crop(img, 500, 500) # @hardcoded to 500x500 for now
y, x, channels = img.shape
# Convert the image to raw bytes.
img_bytes = img.tostring()
# decide how label must be wrapped:
if isinstance(label, int):
tmp = np.zeros((17,), dtype=int)
tmp[label] = 1 # BinaryLabel array for softmax classification.
label = tmp.tolist()
wrap_label = wrap_int64
elif isinstance(label, str):
# assuming we have segmentations:
img = imread(label)
label = img.tostring()
wrap_label = wrap_bytes
# Wrap the data as Features.
feature = tf.train.Features(feature={
'image': wrap_bytes(img_bytes),
'label': wrap_label(label),
'xdim': wrap_int64(x),
'ydim': wrap_int64(y),
'channels': wrap_int64(channels)
})
example = tf.train.Example(features=feature)
# Save current input into tfrecords file
writer.write(example.SerializeToString())
def convert_from_data(data_getter, record_path):
# Open each image and write it into TFRecord.
with tf.python_io.TFRecordWriter(record_path) as writer:
for data, label in data_getter:
img = convert_images(data)
num, y, x, channels = img.shape
# Convert the image to raw bytes.
img_bytes = img.tostring()
# decide how label must be wrapped:
if isinstance(label, int):
tmp = np.zeros((10,), dtype=int) # @hardcoded Cifar10
tmp[label] = 1 # BinaryLabel array for softmax classification.
label = tmp.tolist()
wrap_label = wrap_int64
elif isinstance(label, str):
# assuming we have segmentations:
img = imread(label)
label = img.tostring()
wrap_label = wrap_bytes
# Wrap the data as Features.
feature = tf.train.Features(feature={
'image': wrap_bytes(img_bytes),
'label': wrap_label(label),
'xdim': wrap_int64(x),
'ydim': wrap_int64(y),
'channels': wrap_int64(channels)
})
example = tf.train.Example(features=feature)
# Save current input into tfrecords file
writer.write(example.SerializeToString())
def parse(serialized):
# these are the expected features in the dataset. I may need to modify if we end up trying segmentation.
features = \
{
'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature(17, tf.int64),
'xdim': tf.FixedLenFeature(1, tf.int64),
'ydim': tf.FixedLenFeature(1, tf.int64),
'channels': tf.FixedLenFeature(1, tf.int64)
}
# Parse the serialized data so we get a dict with our data.
parsed_example = tf.parse_single_example(serialized=serialized,
features=features)
# Get the image as raw bytes.
image_raw = parsed_example['image']
# Decode the raw bytes to float32 tensor image
image = tf.decode_raw(image_raw, tf.float32)
# image = tf.cast(image, tf.float32)
# get image metadata
label = parsed_example['label']
xdim = parsed_example['xdim']
ydim = parsed_example['ydim']
channels = parsed_example['channels']
# The image and label are now correct TensorFlow types.
return image, label, xdim, ydim, channels
def input_fn(filenames, train=True, batch_size=32, buffer_size=512):
# Args:
# filenames: Filenames for the TFRecords files.
# train: Boolean whether training (True) or testing (False).
# batch_size: Return batches of this size.
# buffer_size: Read buffers of this size. The random shuffling
# is done on the buffer, so it must be big enough.
# Create a TensorFlow Dataset-object which has functionality
# for reading and shuffling data from TFRecords files.
dataset = tf.data.TFRecordDataset(filenames=filenames)
# Parse the serialized data in the TFRecords files.
# This returns TensorFlow tensors for the image and labels.
dataset = dataset.map(parse)
if train:
# If training then read a buffer of the given size and
# randomly shuffle it.
dataset = dataset.shuffle(buffer_size=buffer_size)
# Allow infinite reading of the data.
num_repeat = None
else:
# If testing then don't shuffle the data.
num_repeat = 1
# Repeat the dataset the given number of times.
dataset = dataset.repeat(num_repeat)
# Get a batch of data with the given size.
dataset = dataset.batch(batch_size)
# Create an iterator for the dataset and the above modifications.
iterator = dataset.make_one_shot_iterator()
# Get the next batch of images and labels, may take dimensionality info later but for now we set to _
images_batch, labels_batch, \
xdim_batch, ydim_batch, channels_batch = iterator.get_next()
if train:
images_batch = distort_batch(images_batch)
# The input-function must return a dict wrapping the images.
x = {'x': images_batch}
y = labels_batch
return x, y
def train_input_fn():
# @hardcoded Cifar10
return input_fn(os.path.expanduser('~/OxfordFlower/train.tfrecord'))
def test_input_fn():
# testing set size...
return input_fn(os.path.expanduser('~/OxfordFlower/test.tfrecord'), train=False)
# basic helpers
def centered_crop(img, xdim, ydim):
# dataset needs to be of the same shape, this is the early, simple solution.
y, x, _ = img.shape
if y < 500:
# some images are Nx499 :( Padding in with zeros for those images.
img = np.vstack((img, 0 * img[0, :][np.newaxis, :]))
y, x, _ = img.shape
if x < 500:
img = np.hstack((img, 0 * img[:, 0][:, np.newaxis]))
y, x, _ = img.shape
x0 = x//2-(xdim // 2)
y0 = y//2-(ydim // 2)
return img[y0:y0 + ydim, x0:x0 + xdim, :]
def convert_images(raw):
"""
Convert images from the CIFAR-10 format and
return a 4-dim array with shape: [image_number, height, width, channel]
where the pixels are floats between 0.0 and 1.0.
"""
# Convert the raw images from the data-files to floating-points.
raw_float = np.array(raw, dtype=np.float32) / 255.0
# Reshape the array to 4-dimensions.
images = raw_float.reshape([-1, 3, 32, 32])
# Reorder the indices of the array.
images = images.transpose([0, 2, 3, 1])
return images
def distort(img: tf.Tensor) -> tf.Tensor:
img = tf.image.random_hue(img, max_delta=0.05)
img = tf.image.random_contrast(img, lower=0.3, upper=1.0)
img = tf.image.random_saturation(img, lower=0.0, upper=2.0)
img = tf.image.random_flip_left_right(img)
return img
def distort_batch(image_batch: tf.Tensor) -> tf.Tensor:
shape = [-1, 500, 500, 3] # @hardcoded Cifar10
image_batch = tf.reshape(image_batch, shape=shape)
image_batch = tf.map_fn(lambda img: distort(img), image_batch)
image_batch = tf.reshape(image_batch, shape=[-1, np.product(shape[1:])])
return image_batch
if __name__ == '__main__':
create_tfrecords()