-
Notifications
You must be signed in to change notification settings - Fork 0
/
feature_extraction.py
218 lines (187 loc) · 6.88 KB
/
feature_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/env python
# coding: utf-8
# In[5]:
"""
Getting Started
This module is used to extract the feture vector of an image.
After extracting the data from image it will save at /extracted folder in same directory.
It takes image of NxN size and extract SIFT feature using Open CV.
Our sample contains 128x128 size binary image. and extract 10(or N) frature in each image.
The class select maximum top 10 SIFT frature of and image and rest of them will elenimate
Class Structure
Extraction
-- self.max_feture = 10
-- __init__
-- extract
-- __save
-- __sift
>>> pip install opencv-python
>>> pip install opencv-contrib-python
"""
import os
import cv2
# In[9]:
class Extraction:
"""
Frature extraction module for extracting top 10 frature on an image
"""
def __init__(self, sample_folder='English Font Image', max_feture=10, bundle_size=127):
"""
__init__(self,sample_folder='English Font Image' max_feture = 10, bundle_size = 127):
sample_folder *string*: Base folder name for input images.
max_feture *int*: Maximum number of feture to be extracted.
bundle_size *int*: Define the number of sample in each extracted file.
dimension of feature vactor is 300 ie. We can extrct 300 feture points maximum
which is higher then 182 which is MAX
"""
self.max_feture = max_feture
self.bundle_size = bundle_size
self.sample_folder = sample_folder
def extract(self):
"""
extract(): Extract feature of images located at ```English Font Image```
folder with directory structure
*None*
@return: None
Input file structure
English Font Image
+-- Sample001
+----- img001-00001.png
+----- img001-00002.png
+----- img001-00003.png
+----- ...
+----- img001-01016.png
+-- Sample002
+----- img001-00001.png
+----- img001-00002.png
+----- img001-00003.png
+----- ...
+----- img001-01016.png
+-- Sample003
+----- img001-00001.png
+----- img001-00002.png
+----- img001-00003.png
+----- ...
+----- img001-01016.png
+-- ...
+-- Sample00n
+----- img001-00001.png
+----- img001-00002.png
+----- img001-00003.png
+----- ...
+----- img001-01016.png
After running this script some file will created in **extracted**
folder that contains the feature data in python array format
The feature array contains
*[point.pt[0], point.pt[1], point.size, point.angle, point.response, point.octave]*.
In each file.
Output file structure
extracted
+-- data1
+-- data2
+-- data3
+-- ...
+-- datan
+-- lable1
+-- lable2
+-- lable3
+-- ...
+-- lablen
"""
i = 0
charecter = []
lable = []
for i in range(1, 1017):
# processing each file
# i.e we are processing file such that We read first image in each class(62),
# do to second image in each class, then third image in each class
for j in range(1, 63):
# Processing each folder
folder = str(j)
if len(folder) == 1:
folder = "00" + folder
if len(folder) == 2:
folder = "0" + folder
file = str(i)
if len(file) == 1:
file = "0000" + file
if len(file) == 2:
file = "000" + file
if len(file) == 3:
file = "00" + file
if len(file) == 4:
file = "0" + file
image = self.sample_folder+"\\Sample"+ folder +"\\img"+ folder +"-"+ file +".png"
# Path of image 1016*62 image
feture = self.__sift(image)
keypoint = []
count_flag = 0
for point in feture:
# getting the values of keypoint
keypoint.append([
point.pt[0],
point.pt[1],
point.size,
point.angle,
point.response,
point.octave])
count_flag += 1
for _ in range(count_flag, self.max_feture):
# feeding 0 to rest of feature for making same size matrix i.e 20.
# (ref : self.max_feture)
# Keras takes same size input
keypoint.append([0, 0, 0, 0, 0, 0])
keypoint.sort(key=lambda x: x[2])
while len(keypoint) > self.max_feture:
keypoint.pop(0)
# appending the keypoint data to cherecter verible
charecter.append(keypoint[:])
# lable [62x1] matrix , 1 for in class 0 for not
lable_tmp = [0 for c in range(62)]
lable_tmp[j-1] = 1
lable.append(lable_tmp[:])
# End of Image
if i % self.bundle_size == 0:
# For every 16 image data dump in "charecter" veriable will save in file
# So that the matrix formed is
# (992, 300, 7) in case of 16
# (496, 300, 7) in case of 8
self.__save(str(charecter), str(lable), str(int(i/self.bundle_size)))
# Clear the memory
charecter = []
lable = []
def __save(self, text, lable, lable_counter):
"""
private function
__save( text, lable, lable_counter):
text *string*: Row feature data in python array(eval) format.
lable *string*: Row lable data in python array(eval) format.
lable_counter *int*: file name counter
@return : None
"""
file = open("extracted"+os.sep+"data_"+lable_counter, 'w')
file.write(text)
file.close()
file = open("extracted"+os.sep+"lable_"+lable_counter, 'w')
file.write(lable)
file.close()
def __sift(self, path):
"""
private function
__sift(path):
path *string*: path of the image that you want to extract.
@return : SIFT keypoint
"""
img = cv2.imread(path)
surf = cv2.xfeatures2d.SIFT_create()
kp, des = surf.detectAndCompute(img, None)
return kp
# In[3]:
if __name__ == '__main__':
# Create a instance of extraction module
e = Extraction()
# Extract the feture
e.extract()
# In[4]:
# Clean the veriable
del e