-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_proc.py
116 lines (85 loc) · 3.7 KB
/
data_proc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
from pathlib import Path
from PIL import Image
import os
import numpy as np
import torch, dill
from crop import crop_image
DATA_PATH = Path('data_set')
class Image_dataset(Dataset):
def __init__(self, start_seq, end_seq):
self.img_arrays = []
# It makes sense to only work with inputs of sizes that can appear in the test set.
im_shape = 90
resize_transforms = transforms.Compose([
transforms.Resize(size=im_shape),
transforms.CenterCrop(size=(im_shape, im_shape)),
# transforms.ToTensor(dtype=torch.uint8)
])
for i in range(start_seq, end_seq):
i = '{:0>3}'.format(i)
folder = os.path.join(DATA_PATH, i)
for image_file in os.listdir(folder):
with Image.open(Path(folder, image_file)) as im:
im = resize_transforms(im)
img = np.asarray(im, dtype=np.uint8)
self.img_arrays.append(img)
def random_within_range(self):
start = np.random.randint(5, 10)
new_max = 15 - start
end = np.random.randint(5, new_max)
return start, end
def __len__(self):
return len(self.img_arrays)
def __getitem__(self, idx):
img_array = self.img_arrays[idx]
border_x = self.random_within_range()
border_y = self.random_within_range()
img_arrays, known_arrs, target_arrs = crop_image(img_array, border_x, border_y)
return img_arrays, known_arrs, target_arrs, idx
def stack_images_arrays(batch_as_list):
n_samples = len(batch_as_list)
n_features = 2
img_arrays = [batch[0] for batch in batch_as_list]
known_arrs = [batch[1] for batch in batch_as_list]
target_arrs = [batch[2] for batch in batch_as_list]
max_X = 90
max_y = 90
stacked_imgs_knowns = torch.zeros(size=(n_samples, n_features, max_X, max_y))
for i, img_arr in enumerate(img_arrays):
stacked_imgs_knowns[i, 0, :max_X, :max_y] = torch.as_tensor(img_arr)
# normalize the inputs
mean = stacked_imgs_knowns[i, 0, :max_X, :max_y].mean()
std = stacked_imgs_knowns[i, 0, :max_X, :max_y].std()
stacked_imgs_knowns[i, 0, :max_X, :max_y] -= mean
stacked_imgs_knowns[i, 0, :max_X, :max_y] /= std
for i, known_arr in enumerate(known_arrs):
stacked_imgs_knowns[i, 1, :max_X, :max_y] = torch.as_tensor(known_arr)
stacked_targets = torch.zeros(size=(n_samples, 2475))
for i, target in enumerate(target_arrs):
stacked_targets[i, :int(target.shape[0])] = torch.as_tensor(target)
labels = [batch_label[3] for batch_label in batch_as_list]
# Convert them to tensors and stack them
stacked_labels = torch.stack([torch.tensor(label, dtype=torch.uint8) for label in labels], dim=0)
return stacked_imgs_knowns, stacked_targets, stacked_labels
class Test_set(Dataset):
def __init__(self, testset):
self.img_arrays = []
self.borders_x = []
self.borders_y = []
for idx in range(208):
img_array = testset["input_arrays"][idx]
border_x = testset['borders_x'][idx]
border_y = testset['borders_y'][idx]
self.img_arrays.append(img_array)
self.borders_x.append(border_x)
self.borders_y.append(border_y)
def __len__(self):
return len(self.img_arrays)
def __getitem__(self, idx):
img_arr = self.img_arrays[idx]
b_x = self.borders_x[idx]
b_y = self.borders_y[idx]
img_array, known_array, target_array = crop_image(img_arr, b_x, b_y)
return img_array, known_array, target_array, idx