-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathframe.py
310 lines (217 loc) · 9.88 KB
/
frame.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
"""
https://github.com/FrederikSchorr/sign-language
Extract frames from a video (or many videos).
Plus some frame=image manipulation utilities.
"""
import os
import glob
import warnings
import random
from subprocess import check_output
import numpy as np
import pandas as pd
import cv2
def image_resize_aspectratio(arImage: np.array, nMinDim:int = 256) -> np.array:
nHeigth, nWidth, _ = arImage.shape
if nWidth >= nHeigth:
# wider than high => map heigth to 224
fRatio = nMinDim / nHeigth
else:
fRatio = nMinDim / nWidth
if fRatio != 1.0:
arImage = cv2.resize(arImage, dsize = (0,0), fx = fRatio, fy = fRatio, interpolation=cv2.INTER_LINEAR)
return arImage
def images_resize_aspectratio(arImages: np.array, nMinDim:int = 256) -> np.array:
nImages, _, _, _ = arImages.shape
liImages = []
for i in range(nImages):
arImage = image_resize_aspectratio(arImages[i, ...])
liImages.append(arImage)
return np.array(liImages)
def video2frames(sVideoPath:str, nResizeMinDim:int) -> np.array:
""" Read video file with OpenCV and return array of frames
The frame rate depends on the video (and cannot be set)
if nMinDim != None: Frames are resized preserving aspect ratio
so that the smallest dimension is eg 256 pixels, with bilinear interpolation
"""
# Create a VideoCapture object and read from input file
oVideo = cv2.VideoCapture(sVideoPath)
if (oVideo.isOpened() == False): raise ValueError("Error opening video file")
liFrames = []
# Read until video is completed
while(True):
(bGrabbed, arFrame) = oVideo.read()
if bGrabbed == False: break
if nResizeMinDim != None:
# resize image
arFrameResized = image_resize_aspectratio(arFrame, nResizeMinDim)
# Save the resulting frame to list
liFrames.append(arFrameResized)
return np.array(liFrames)
def frames2files(arFrames:np.array, sTargetDir:str):
""" Write array of frames to jpg files
Input: arFrames = (number of frames, height, width, depth)
"""
os.makedirs(sTargetDir, exist_ok=True)
for nFrame in range(arFrames.shape[0]):
cv2.imwrite(sTargetDir + "/frame%04d.jpg" % nFrame, arFrames[nFrame, :, :, :])
return
def files2frames(sPath:str) -> np.array:
# important to sort image files upfront
liFiles = sorted(glob.glob(sPath + "/*.jpg"))
if len(liFiles) == 0: raise ValueError("No frames found in " + sPath)
liFrames = []
# loop through frames
for sFramePath in liFiles:
arFrame = cv2.imread(sFramePath)
liFrames.append(arFrame)
return np.array(liFrames)
def frames_downsample(arFrames:np.array, nFramesTarget:int) -> np.array:
""" Adjust number of frames (eg 123) to nFramesTarget (eg 79)
works also if originally less frames then nFramesTarget
"""
nSamples, _, _, _ = arFrames.shape
if nSamples == nFramesTarget: return arFrames
# down/upsample the list of frames
fraction = nSamples / nFramesTarget
index = [int(fraction * i) for i in range(nFramesTarget)]
liTarget = [arFrames[i,:,:,:] for i in index]
#print("Change number of frames from %d to %d" % (nSamples, nFramesTarget))
#print(index)
return np.array(liTarget)
def image_crop(arFrame, nHeightTarget, nWidthTarget) -> np.array:
""" crop 1 frame to specified size, choose centered image
"""
nHeight, nWidth, _ = arFrame.shape
if (nHeight < nHeightTarget) or (nWidth < nWidthTarget):
raise ValueError("Image height/width too small to crop to target size")
# calc left upper corner
sX = int(nWidth/2 - nWidthTarget/2)
sY = int(nHeight/2 - nHeightTarget/2)
arFrame = arFrame[sY:sY+nHeightTarget, sX:sX+nWidthTarget, :]
return arFrame
def images_crop(arFrames:np.array, nHeightTarget, nWidthTarget) -> np.array:
""" crop each frame in array to specified size, choose centered image
"""
nSamples, nHeight, nWidth, nDepth = arFrames.shape
if (nHeight < nHeightTarget) or (nWidth < nWidthTarget):
raise ValueError("Image height/width too small to crop to target size")
# calc left upper corner
sX = int(nWidth/2 - nWidthTarget/2)
sY = int(nHeight/2 - nHeightTarget/2)
arFrames = arFrames[:, sY:sY+nHeightTarget, sX:sX+nWidthTarget, :]
return arFrames
def images_rescale(arFrames:np.array) -> np.array(float):
""" Rescale array of images (rgb 0-255) to [-1.0, 1.0]
"""
ar_fFrames = arFrames / 127.5
ar_fFrames -= 1.
return ar_fFrames
def images_normalize(arFrames:np.array, nFrames:int, nHeight:int, nWidth:int, bRescale:bool = True) -> np.array(float):
""" Several image normalizations/preprocessing:
- downsample number of frames
- crop to centered image
- rescale rgb 0-255 value to [-1.0, 1.0] - only if bRescale == True
Returns array of floats
"""
# normalize the number of frames (assuming typically downsampling)
arFrames = frames_downsample(arFrames, nFrames)
# crop to centered image
arFrames = images_crop(arFrames, nHeight, nWidth)
if bRescale:
# normalize to [-1.0, 1.0]
arFrames = images_rescale(arFrames)
else:
if np.max(np.abs(arFrames)) > 1.0: warnings.warn("Images not normalized")
return arFrames
def frames_show(arFrames:np.array, nWaitMilliSec:int = 100):
nFrames, nHeight, nWidth, nDepth = arFrames.shape
for i in range(nFrames):
cv2.imshow("Frame", arFrames[i, :, :, :])
cv2.waitKey(nWaitMilliSec)
return
def video_length(sVideoPath:str) -> float:
return int(check_output(["mediainfo", '--Inform=Video;%Duration%', sVideoPath]))/1000.0
def videosDir2framesDir(sVideoDir:str, sFrameDir:str, nFramesNorm:int = None,
nResizeMinDim:int = None, tuCropShape:tuple = None, nClasses:int = None):
""" Extract frames from videos
Input video structure:
... sVideoDir / train / class001 / videoname.avi
Output:
... sFrameDir / train / class001 / videoname / frames.jpg
"""
# do not (partially) overwrite existing frame directory
#if os.path.exists(sFrameDir):
# warnings.warn("Frame folder " + sFrameDir + " already exists, frame extraction stopped")
# return
# get videos. Assume sVideoDir / train / class / video.mp4
dfVideos = pd.DataFrame(sorted(glob.glob(sVideoDir + "/*/*/*.*")), columns=["sVideoPath"])
print("Located {} videos in {}, extracting to {} ..."\
.format(len(dfVideos), sVideoDir, sFrameDir))
if len(dfVideos) == 0: raise ValueError("No videos found")
# eventually restrict to first nLabels
if nClasses != None:
dfVideos.loc[:,"sLabel"] = dfVideos.sVideoPath.apply(lambda s: s.split("/")[-2])
liClasses = sorted(dfVideos.sLabel.unique())[:nClasses]
dfVideos = dfVideos[dfVideos["sLabel"].isin(liClasses)]
print("Using only {} videos from first {} classes".format(len(dfVideos), nClasses))
nCounter = 0
# loop through all videos and extract frames
for sVideoPath in dfVideos.sVideoPath:
# assemble target diretory (assumed directories see above)
li_sVideoPath = sVideoPath.split("/")
if len(li_sVideoPath) < 4: raise ValueError("Video path should have min 4 components: {}".format(str(li_sVideoPath)))
sVideoName = li_sVideoPath[-1].split(".")[0]
sTargetDir = sFrameDir + "/" + li_sVideoPath[-3] + "/" + li_sVideoPath[-2] + "/" + sVideoName
# check if frames already extracted
if nFramesNorm != None and os.path.exists(sTargetDir):
nFrames = len(glob.glob(sTargetDir + "/*.*"))
if nFrames == nFramesNorm:
print("Video %5d already extracted to %s" % (nCounter, sTargetDir))
nCounter += 1
continue
else:
print("Video %5d: Directory with %d instead of %d frames detected" % (nCounter, nFrames, nFramesNorm))
# create target directory
os.makedirs(sTargetDir, exist_ok = True)
# slice videos into frames with OpenCV
arFrames = video2frames(sVideoPath, nResizeMinDim)
# length and fps
fVideoSec = video_length(sVideoPath)
nFrames = len(arFrames)
fFPS = nFrames / fVideoSec
# downsample
if nFramesNorm != None:
arFrames = frames_downsample(arFrames, nFramesNorm)
# crop images
if tuCropShape != None:
arFrames = images_crop(arFrames, *tuCropShape)
# write frames to .jpg files
frames2files(arFrames, sTargetDir)
print("Video %5d | %5.1f sec | %d frames | %4.1f fps | saved %s in %s" % (nCounter, fVideoSec, nFrames, fFPS, str(arFrames.shape), sTargetDir))
nCounter += 1
return
def unittest(sVideoDir, nSamples = 100):
print("\nAnalyze video durations and fps from %s ..." % (sVideoDir))
print(os.getcwd())
liVideos = glob.glob(sVideoDir + "/*/*.mp4") + glob.glob(sVideoDir + "/*/*.avi")
if len(liVideos) == 0: raise ValueError("No videos detected")
fVideoSec_sum, nFrames_sum = 0, 0
for i in range(nSamples):
sVideoPath = random.choice(liVideos)
#print("Video %s" % sVideoPath)
# read video
arFrames = video2frames(sVideoPath, 256)
nFrames = len(arFrames)
# determine length of video in sec and deduce frame rate
fVideoSec = video_length(sVideoPath)
fFPS = nFrames / fVideoSec
fVideoSec_sum += fVideoSec
nFrames_sum += nFrames
print("%2d: Shape %s, duration %.1f sec, fps %.1f" % (i, str(arFrames.shape), fVideoSec, fFPS))
nCount = i+1
print("%d samples: Average video duration %.1f sec, fps %.1f" % (nSamples, fVideoSec_sum / nCount, nFrames_sum / fVideoSec_sum))
return
if __name__ == '__main__':
unittest("data-set/01-ledasila/021/train", 100)
unittest("data-set/04-chalearn/010/train", 100)