Skip to content

Commit

Permalink
Comment modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
zhong-al committed Dec 21, 2024
1 parent 7a313c8 commit 19b2878
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 5 deletions.
8 changes: 8 additions & 0 deletions src/kabr_tools/utils/slowfast/cfg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# add code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/config/defaults.py except custom_config
# add + simplify load_config function from https://github.com/facebookresearch/SlowFast/blob/88bb4c9cf03fd97fce403f8b475543cb8e8ca5ea/slowfast/utils/parser.py

"""Configs."""
import math
Expand Down Expand Up @@ -1282,10 +1284,16 @@ def assert_and_infer_cfg(cfg):


def get_cfg():
"""
Get a copy of the default config.
"""
return _C.clone()


def load_config(path_to_config=None):
"""
Given the arguemnts, load and initialize the configs.
"""
# Setup cfg.
cfg = get_cfg()

Expand Down
5 changes: 2 additions & 3 deletions src/kabr_tools/utils/slowfast/head.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# X3DHead from https://github.com/facebookresearch/SlowFast/blob/88bb4c9cf03fd97fce403f8b475543cb8e8ca5ea/slowfast/models/head_helper.py

"""ResNe(X)t Head helper."""

Expand Down Expand Up @@ -65,7 +66,6 @@ def __init__(
self._construct_head(dim_in, dim_inner, dim_out, norm_module)

def _construct_head(self, dim_in, dim_inner, dim_out, norm_module):

self.conv_5 = nn.Conv3d(
dim_in,
dim_inner,
Expand Down Expand Up @@ -111,8 +111,7 @@ def _construct_head(self, dim_in, dim_inner, dim_out, norm_module):
self.act = nn.Sigmoid()
else:
raise NotImplementedError(
"{} is not supported as an activation" "function.".format(
self.act_func)
"{} is not supported as an activation" "function.".format(self.act_func)
)

def forward(self, inputs):
Expand Down
1 change: 1 addition & 0 deletions src/kabr_tools/utils/slowfast/norm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/batchnorm_helper.py with unused import (NaiveSyncBatchNorm1d) removed

"""BatchNorm (BN) utility functions and custom batch-size BN implementations"""

Expand Down
4 changes: 4 additions & 0 deletions src/kabr_tools/utils/slowfast/resnet.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# drop_path from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/common.py
# Nonlocal from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/nonlocal_helper.py
# SE, Swish from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/operators.py
# remaining code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/resnet_helper.py without slowfast imports

"""Video models."""

Expand Down
2 changes: 2 additions & 0 deletions src/kabr_tools/utils/slowfast/stem.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/stem_helper.py
# unused torch import removed

"""ResNe(X)t 3D stem helper."""

Expand Down
62 changes: 60 additions & 2 deletions src/kabr_tools/utils/slowfast/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py
# get_sequence, pack_pathway_output, tensor_normalize from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/datasets/utils.py
# scale from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/datasets/cv2_transform.py
# process_cv2_inputs from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/utils.py
# get_input_clip from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py

import math
import cv2
Expand All @@ -9,6 +12,18 @@


def get_sequence(center_idx, half_len, sample_rate, num_frames):
"""
Sample frames among the corresponding clip.
Args:
center_idx (int): center frame idx for current clip
half_len (int): half of the clip length
sample_rate (int): sampling rate for sampling frames inside of the clip
num_frames (int): number of expected sampled frames
Returns:
seq (list): list of indexes of sampled frames in this clip.
"""
seq = list(range(center_idx - half_len, center_idx + half_len, sample_rate))

for seq_idx in range(len(seq)):
Expand All @@ -20,6 +35,16 @@ def get_sequence(center_idx, half_len, sample_rate, num_frames):


def scale(size, image):
"""
Scale the short side of the image to size.
Args:
size (int): size to scale the image.
image (array): image to perform short side scale. Dimension is
`height` x `width` x `channel`.
Returns:
(ndarray): the scaled image with dimension of
`height` x `width` x `channel`.
"""
height = image.shape[0]
width = image.shape[1]
if (width <= height and width == size) or (height <= width and height == size):
Expand All @@ -36,6 +61,14 @@ def scale(size, image):


def process_cv2_inputs(frames, cfg):
"""
Normalize and prepare inputs as a list of tensors. Each tensor
correspond to a unique pathway.
Args:
frames (list of array): list of input images (correspond to one clip) in range [0, 255].
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
"""
inputs = torch.from_numpy(np.array(frames)).float() / 255
inputs = tensor_normalize(inputs, cfg.DATA.MEAN, cfg.DATA.STD)
# T H W C -> C T H W.
Expand All @@ -49,6 +82,13 @@ def process_cv2_inputs(frames, cfg):


def tensor_normalize(tensor, mean, std, func=None):
"""
Normalize a given tensor by subtracting the mean and dividing the std.
Args:
tensor (tensor): tensor to normalize.
mean (tensor or list): mean value to subtract.
std (tensor or list): std to divide.
"""
if tensor.dtype == torch.uint8:
tensor = tensor.float()
tensor = tensor / 255.0
Expand All @@ -64,6 +104,16 @@ def tensor_normalize(tensor, mean, std, func=None):


def pack_pathway_output(cfg, frames):
"""
Prepare output as a list of tensors. Each tensor corresponding to a
unique pathway.
Args:
frames (tensor): frames of images sampled from the video. The
dimension is `channel` x `num frames` x `height` x `width`.
Returns:
frame_list (list): list of tensors with the dimension of
`channel` x `num frames` x `height` x `width`.
"""
if cfg.DATA.REVERSE_INPUT_CHANNEL:
frames = frames[[2, 1, 0], :, :, :]
if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH:
Expand All @@ -90,7 +140,15 @@ def pack_pathway_output(cfg, frames):


def get_input_clip(cap: cv2.VideoCapture, cfg, keyframe_idx: int) -> list[Tensor]:

"""
Get input clip from the video/folder of images for a given
keyframe index.
Args:
keyframe_idx (int): index of the current keyframe.
Returns:
clip (list of tensors): formatted input clip(s) corresponding to
the current keyframe.
"""
seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
seq = get_sequence(
Expand Down
12 changes: 12 additions & 0 deletions src/kabr_tools/utils/slowfast/x3d.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# round_width from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/utils.py
# init_weights from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/utils/weight_init_helper.py
# _POOL1, _TEMPORAL_KERNEL_BASIS, _MODEL_STAGE_DEPTH, X3D from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/video_model_builder.py
# simplify build_model from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/build.py
# replace slowfast imports with local imports

import math
import torch
Expand Down Expand Up @@ -298,6 +303,13 @@ def forward(self, x, bboxes=None):


def build_model(cfg, gpu_id=None):
"""
Builds the video model.
Args:
cfg (configs): configs that contains the hyper-parameters to build the
backbone. Details can be seen in slowfast/config/defaults.py.
gpu_id (Optional[int]): specify the gpu index to build model.
"""
if torch.cuda.is_available():
assert (
cfg.NUM_GPUS <= torch.cuda.device_count()
Expand Down

0 comments on commit 19b2878

Please sign in to comment.