import decord as de from matplotlib import pyplot as plt # using cpu in this example ctx = de.cpu(0) # example video video = ‘Javelin_standing_throw_drill.mkv‘ vr = de.VideoReader(video) # using default resolution print(‘Video frames #:‘, len(vr)) # 视频帧数 print(‘First frame shape:‘, vr[0].shape) # 每帧的shape
Video frames #: 48 First frame shape: (240, 320, 3)
控制帧的尺寸:
vr = de.VideoReader(video, width=120, height=240)
print(‘Frame shape:‘, vr[0].shape)
Frame shape: (240, 120, 3)
随机访问显然很慢,但decord使用内部优化来确保不会在这里浪费太多精力。
返回的帧是DLPack兼容的NDArray格式(例如在TVM中使用),可转为numpy数组。
decord中有一个桥接系统,它自动将所有输出转换为与深度学习框架兼容的阵列,例如MXNet、PyTorch、Tensorflow。但始终可以利用numpy数组。
frame10 = vr[10].asnumpy()
plt.imshow(frame10)
plt.show()
很容易一起获得许多帧:
frames = vr.get_batch(range(0, len(vr) - 1, 5)) print(frames.shape)
(10, 240, 120, 3)
import sys, os import decord as de # using cpu in this example ctx = de.cpu(0) # using batchsize = 2 and smaller resolution in this example shape = (2, 480, 640, 3) # using kinetics example videos videos = [‘Javelin_standing_throw_drill.mkv‘, ‘flipping_a_pancake.mkv‘] # using in-batch frame interval 5 interval = 5 # 一个batch中每两帧的距离 # using inter-batch frame interval 20, which means batch-batch interval is 20 skip = 3 # 不同batch之间的距离 # first see how sequential read looks like vl = de.VideoLoader(videos, ctx=ctx, shape=shape, interval=interval, skip=skip, shuffle=0) print(‘num batches:‘, len(vl))
num batches: 9
可视化:
def disp_batches(video_loader, max_disp=5): %matplotlib inline from matplotlib import pyplot as plt import matplotlib.gridspec as gridspec cnt = 0 vl.reset() for batch in vl: if cnt >= max_disp: break print(‘batch data shape:‘, batch[0].shape) print(‘indices:‘, ‘, ‘.join([‘(file: {} frame: {})‘.format(x, y) for x, y in batch[1].asnumpy()])) print(‘----------‘) data = batch[0].asnumpy() columns = 4 rows = max(1, (data.shape[0] + 1) // columns) fig = plt.figure(figsize = (32,(16 // columns) * rows)) gs = gridspec.GridSpec(rows, columns) for i in range(data.shape[0]): plt.subplot(gs[i]) plt.axis("off") plt.imshow(data[i]) cnt += 1 disp_batches(vl, 5)
batch data shape: (2, 480, 640, 3) indices: (file: 0 frame: 0), (file: 0 frame: 7) # 0-7共6帧;间隔为3,下一次从11开始 ---------- batch data shape: (2, 480, 640, 3) indices: (file: 0 frame: 11), (file: 0 frame: 18) # 11-18共6帧;间隔为3,下一次从22开始... ---------- batch data shape: (2, 480, 640, 3) indices: (file: 0 frame: 22), (file: 0 frame: 29) ---------- batch data shape: (2, 480, 640, 3) indices: (file: 1 frame: 0), (file: 1 frame: 7) ---------- batch data shape: (2, 480, 640, 3) indices: (file: 1 frame: 11), (file: 1 frame: 18) ----------
可以看到这个是从第一个视频截取、然后第二个...那么可以按照如下进行shuffle:
vl = de.VideoLoader(videos, ctx=ctx, shape=shape, interval=interval, skip=skip, shuffle=2) print(‘num batches:‘, len(vl)) disp_batches(vl, 5)
num batches: 8 batch data shape: (2, 480, 640, 3) indices: (file: 1 frame: 33), (file: 1 frame: 40) # file1中截取 ---------- batch data shape: (2, 480, 640, 3) indices: (file: 1 frame: 22), (file: 1 frame: 29) ---------- batch data shape: (2, 480, 640, 3) indices: (file: 0 frame: 11), (file: 0 frame: 18) # file0中截取 ---------- batch data shape: (2, 480, 640, 3) indices: (file: 1 frame: 44), (file: 1 frame: 51) ---------- batch data shape: (2, 480, 640, 3) indices: (file: 1 frame: 11), (file: 1 frame: 18) ----------
可以看到已经不是按照视频顺序截取了。
原文:https://www.cnblogs.com/king-lps/p/13062801.html