-
Notifications
You must be signed in to change notification settings - Fork 2
/
data_loader.py
107 lines (84 loc) · 4.1 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
import tensorflow as tf
import random
import progressbar
import os.path
import matplotlib.pyplot as plt
# Data loader to provide iterator object to the dataset
class Loader():
def __init__(self,batch_size=64, path=None):
assert (path != None), "Provide the path to dataset"
#dataset_path = "/local/scratch/a/aankit/tensorflow/approx_memristor/cifar100/dataset/"
dataset_path = path + "/" # add forward slash (for redundancy)
# Downloads the CIFAR 100 dataset and saves it as a TFRecords File if not available
self.filenames = [dataset_path+"train.tfrecords",dataset_path+"test.tfrecords"]
self.batch_size = batch_size
self.num_training_examples = 50000
self.num_testing_examples = 10000
self.num_classes = 100
found = True
for file in self.filenames:
if not os.path.isfile(file):
found = False
print("TFRecords not found for file: " + file)
assert (found == True)
'''
if not found:
(train_x,train_y), (test_x,test_y) = tf.keras.datasets.cifar100.load_data(label_mode='fine')
train_x = train_x.astype(np.float32)
test_x = test_x.astype(np.float32)
n_mean = np.mean(train_x)
n_max = np.max(train_x)
n_min = np.min(train_x)
train_x = (train_x - n_mean)/(n_max-n_min)
test_x = (test_x - n_mean)/(n_max-n_min)
self.create_tf_record(examples=train_x,labels=train_y,path="train.tfrecords")
self.create_tf_record(examples=test_x,labels=test_y,path="test.tfrecords")
'''
def get_dataset(self,train=True):
if train:
filenames = self.filenames[0]
else:
filenames = self.filenames[1]
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.apply(
tf.contrib.data.shuffle_and_repeat(10000,seed=0)) #10000 is the bufefr size, i.e. number of elements prefetched
dataset = dataset.apply(
tf.contrib.data.map_and_batch(self.parse_example,batch_size=self.batch_size,num_parallel_batches=3))
dataset = dataset.apply(
tf.contrib.data.prefetch_to_device("/device:GPU:0",100 ))
iterator = dataset.make_one_shot_iterator()
return iterator
def parse_example(self, serialized):
features = {'image':(tf.FixedLenFeature((),tf.string,default_value="")),
'label':(tf.FixedLenFeature((),tf.int64,default_value=0))}
parsed = tf.parse_single_example(serialized=serialized, features=features)
raw_image = parsed['image']
image = tf.decode_raw(raw_image,tf.float32)
return tf.reshape(image,[32,32,3]), parsed['label']
def create_tf_record(self,examples,labels,path):
# Takes training examples and labels to save in a .tfrecord file at the given path
with tf.python_io.TFRecordWriter(path) as writer:
# Make examples into serialized string
# we can just save all images
# This is just a progress bar
print("Writing " + path + "...")
widgets = [progressbar.Percentage(), progressbar.Bar()]
bar = progressbar.ProgressBar(widgets=widgets, max_value=examples.shape[0]).start()
# Loop through all images
for i in range(examples.shape[0]):
# turn image into bytes and get our label
img = examples[i].tostring()
label = labels[i]
# Now we need an example which is made up of a feature
features = tf.train.Features(
feature={
'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img])),
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))}
)
example = tf.train.Example(features=features)
serialized = example.SerializeToString()
writer.write(serialized)
# Update the progressbar
bar.update(i + 1)
bar.finish()