-
Notifications
You must be signed in to change notification settings - Fork 2
/
dataset_process.py
270 lines (216 loc) · 13 KB
/
dataset_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#
# I did not made a manual memory free in this script
#
import cv2
import numpy as np
import os
from os import listdir
from os.path import isfile, join
import scipy.io.wavfile
from math import floor
import re
import pickle
from imutils import paths
from include.telegram_logger import *
from include.globals_and_functions import *
try:
#check if needed programs are installed
print_info("Checking necessary tools")
#if not is_tool("ffmpeg"):
# print_error("Please install ffmpeg for the video processing")
# exit()
if not is_tool("ffprobe"):
print_error("Please install ffprobe for the audio extraction")
exit()
print_info("Reading filepath off all videos")
#Get the path of ALL videos in the raw folder. All videos are stored in the dataset/raw path
dataset_raw_datapath = [f for f in listdir(os.path.join(CONST_STR_DATASET_BASE_PATH,CONST_STR_DATASET_DATAPATH, CONST_STR_DATASET_RAW_DATAPATH)) if isfile(join(os.path.join(CONST_STR_DATASET_BASE_PATH,CONST_STR_DATASET_DATAPATH, CONST_STR_DATASET_RAW_DATAPATH), f))]
# # First, check what videos were already processed. To do this, we load the "config" file in the dataset directory,
# # and check for which videos are "new" in the raw folder. We only need to process those
try: #load the config file
with open(os.path.join(CONST_STR_DATASET_BASE_PATH,CONST_STR_DATASET_DATAPATH,CONST_STR_DATASET_CONFIG_FILENAME), "rb") as fp:
dataset_config_file = pickle.load(fp)
# here we see what videos were already processed and which were not
unprocessed_videos = [item for item in dataset_raw_datapath if item not in dataset_config_file]
# ..and we add the unprocessed videos to the dataset_config_file (because they will be processed at the end of the script)
dataset_config_file += unprocessed_videos
except: # new dataset, config file does not exist
print_warning("Could not find dataset config file. Creating one")
# ... there is no config file
dataset_config_file = dataset_raw_datapath
# ... all data is unprocessed
unprocessed_videos = dataset_raw_datapath
if not unprocessed_videos:
print_info("There are no new videos to be processed in the dataset")
telegramSendMessage('Dataset is already up to date')
exit()
else:
print_info("The following new videos were added to the dataset")
for video_name in unprocessed_videos:
print('\t'+video_name)
telegramSendMessage(str(len(unprocessed_videos))+" new videos were added to the dataset")
print_info("Starting dataset processing")
telegramSendMessage("Starting dataset processing")
#extract frames and sound for each video in train data!
for video_name in unprocessed_videos:
# Datapath of raw video (where the raw video is)
video_raw_datapath = os.path.join(CONST_STR_DATASET_BASE_PATH,CONST_STR_DATASET_DATAPATH,CONST_STR_DATASET_RAW_DATAPATH,video_name)
# Datapath of the processed information of video
video_datapath = os.path.join(CONST_STR_DATASET_BASE_PATH,CONST_STR_DATASET_DATAPATH,video_name.replace(".", ""))
# Make directory to hold all extracted information from the video
try:
if not os.path.exists(video_datapath):
os.makedirs(video_datapath)
except OSError:
print_error("Could not make directory for video '"+str(video_name)+"'")
telegramSendMessage('Error: Creating directory')
exit()
# ------------------- Extraction of frames from video
# Resize the raw video to the desired resolution
#os_command = "ffmpeg -i "+video_raw_datapath+" -s "+CONST_STR_DATASET_OUTPUT_RESOLUTION+" -c:a copy "+CONST_STR_DATASET_DATAPATH+"resized-"+video_name
#os.system(os_command)
# Extact frames from resized video
#cap = cv2.VideoCapture(CONST_STR_DATASET_DATAPATH+"resized-"+video_name)
# I stoped using ffmpeg after noticing undesired compression artifacts
# on the output frames. Using cv2.resize() is a much better alternative
cap = cv2.VideoCapture(video_raw_datapath)
currentFrame = 0 # This variable counts the frame in the extracted video
videoFrame = 0 # This variable counts the actual frame in the raw video
# I use those 2 variables so that I can change the fps of extraction
# by decimation
total_number_of_video_frames = 0 # Total number of extracted video frames
print_info("Extracting frames from video "+video_name)
telegramSendMessage("Extracting frames from video "+video_name)
while cap.isOpened():
# Capture frame-by-frame
ret, frame = cap.read()
if videoFrame%CONST_INT_DATASET_DECIMATION_FACTOR == 0:
if ret:
# Saves image of the current frame in png file
frame_name = os.path.join(video_datapath,str(currentFrame)+'.png')
# Frame resize
frame = cv2.resize(frame, CONST_VEC_DATASET_OUTPUT_RESOLUTION)
cv2.imwrite(frame_name, frame)
# To stop duplicate images
currentFrame += 1
else:
break
videoFrame += 1
total_number_of_video_frames = currentFrame
# The next section is maintained only for compatibility porposues. I previously
# had made the script force the number of images extracted to be a multiple of
# 27 (for a easier handling on the training process). Note that this is NOT
# required anymore, and the script should only do that with programers consent.
# When building a dataset from scratch, it can be disable.
if False:
print_warning("Forcing the number of extracted frames to be multiple of 27. You should disable it if building a dataset from scratch")
number_of_extra_extracted_frames = total_number_of_video_frames % 27
# We here delete those 'extra' video frames
for i in range(total_number_of_video_frames-number_of_extra_extracted_frames, total_number_of_video_frames, 1):
extra_frame = os.path.join(video_datapath,str(i)+'.png')
os.system('rm -f '+extra_frame)
total_number_of_video_frames = total_number_of_video_frames - number_of_extra_extracted_frames
print_warning(str(number_of_extra_extracted_frames)+" were deleted. Total number of frames: "+str(total_number_of_video_frames))
# Save the number of frames in this video on the frames clfolder
with open(os.path.join(video_datapath,CONST_STR_DATASET_NMB_OF_FRAMES_FILENAME), "wb") as fp:
pickle.dump(total_number_of_video_frames, fp)
print_info(str(total_number_of_video_frames)+" frames were extracted from video "+video_name)
# When everything done, release the capture
cap.release()
# ------------------- Extraction of audio from video
# I think I will not have to reprocess this, so I will simply not execute this on server
# (Because it is very time consuming)
if True:
print_info("Extracting audio information from video "+video_name)
telegramSendMessage("Extracting audio information from video "+video_name)
# Execute command to extract only audio from video
audio_filepath = os.path.join(video_datapath,CONS_STR_DATASET_AUDIOFILE_FILENAME)
os_command = "ffmpeg -i "+video_raw_datapath+" "+audio_filepath
os.system(os_command)
# Get samples from audio file generated
print_info("Reading audio file ...")
FSample, samples = scipy.io.wavfile.read(audio_filepath)
samples = np.array(samples)
original_audio = samples
# ------------------- Calculate the total power for each frame
M = floor(samples.shape[0]/total_number_of_video_frames) #Number of Samples used for each frame calculatiom
St = np.zeros((total_number_of_video_frames, 2)) #Array of audio power in each frame
print_info("Calculating audio power for each frame ...")
telegramSendMessage("Calculating audio power for each frame ...")
# Square and divide all samples by M
samples = np.square(samples, dtype='int64')
samples = np.divide(samples, M)
# Do the partial sum of everything
for i in range(0, total_number_of_video_frames):
St[i] = np.sum(samples[i*M:(i+1)*M], axis=0)
# Clip the zeros to a minor value, and log everything
St = np.clip(St, 1e-12, None)
St = np.log(St)
"""
Previous algorithm. This was very time consuming to run
for i in range(0, total_number_of_video_frames):
partialSumRight = 0
partialSumLeft = 0
for j in range(0, M):
partialSumLeft += (1/M)*((samples[j+i*M, 0])**2)
partialSumRight += (1/M)*((samples[j+i*M, 1])**2)
if partialSumLeft > 0:
St[i, 0] = log(partialSumLeft)
if partialSumRight > 0:
St[i, 1] = log(partialSumRight)
"""
# save numpy array as .npy file
np.save(os.path.join(video_datapath,CONS_STR_DATASET_AUDIODATA_FILENAME), St)
# ------------------- dataset_build
for video_name in unprocessed_videos:
print_info("Stacking images for video "+video_name)
telegramSendMessage("Stacking images for video "+video_name)
first_frame = True
# Datapath of raw video (where the raw video is)
video_raw_datapath = os.path.join(CONST_STR_DATASET_BASE_PATH,CONST_STR_DATASET_DATAPATH,CONST_STR_DATASET_RAW_DATAPATH,video_name)
# Datapath of the processed information of video
video_datapath = os.path.join(CONST_STR_DATASET_BASE_PATH,CONST_STR_DATASET_DATAPATH,video_name.replace(".", ""))
# grab all image paths and order it correctly
frame_datapaths = list(paths.list_images(video_datapath))
frame_datapaths.sort(key=lambda f: int(re.sub('\D', '', f)))
for frame_path in frame_datapaths:
# Read image frame
frame = cv2.imread(frame_path)
# create a flattened list of pixel values
frame_data = [np.array(x, dtype=np.uint8) for x in frame.flatten()]
# We then stack all frames on top of each other
# Image stacking is now what consumes the most time in processing
if first_frame:
stacked_frames_array = frame_data
first_frame = False
else:
stacked_frames_array = np.vstack((stacked_frames_array, frame_data))
# Save the stacked frames numpy to the corresponding video folder
print_info("Saving stacked frames data to "+os.path.join(video_datapath,CONS_STR_DATASET_STACKED_FRAMES_FILENAME))
np.save(os.path.join(video_datapath,CONS_STR_DATASET_STACKED_FRAMES_FILENAME), stacked_frames_array)
#Last step: Calculate mean and std for each video in dataset. Save this information in disk
for video_name in unprocessed_videos:
print_info("Loading numpy dataset for mean and std calculation")
# Load numpy array
video_datapath = os.path.join(CONST_STR_DATASET_BASE_PATH,CONST_STR_DATASET_DATAPATH,video_name.replace(".", ""))
video_data = np.load(os.path.join(video_datapath,CONS_STR_DATASET_STACKED_FRAMES_FILENAME))
video_data = np.reshape(video_data, (video_data.shape[0],)+CONST_VEC_DATASET_OUTPUT_IMAGE_SHAPE)
# Calculate mean and std of video
mean = np.mean(video_data, axis=(0,1,2)).astype(float)
std = np.std(video_data, axis=(0,1,2)).astype(float)
statistics = [mean, std]
print("mean: "+'\t'+str(statistics[0]))
print("std: " +'\t'+str(statistics[1]))
# Save it to a file
with open(os.path.join(video_datapath,CONS_STR_DATASET_STATISTICS_FILENAME), "wb") as fp:
pickle.dump(statistics, fp)
# Save the information of all videos on file
with open(os.path.join(CONST_STR_DATASET_BASE_PATH,CONST_STR_DATASET_DATAPATH,CONST_STR_DATASET_CONFIG_FILENAME), "wb") as fp:
pickle.dump(dataset_config_file, fp)
print_info("Script ended successfully")
telegramSendMessage("Script ended successfully")
except Exception as e:
print_error('An error has occurred')
print_error(str(e))
telegramSendMessage('[ERROR]: An error has occurred')
telegramSendMessage(str(e))