diff --git a/src/kabr_tools/utils/slowfast/cfg.py b/src/kabr_tools/utils/slowfast/cfg.py index 6ef6c20..c89fb35 100644 --- a/src/kabr_tools/utils/slowfast/cfg.py +++ b/src/kabr_tools/utils/slowfast/cfg.py @@ -1,4 +1,6 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# add code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/config/defaults.py except custom_config +# add + simplify load_config function from https://github.com/facebookresearch/SlowFast/blob/88bb4c9cf03fd97fce403f8b475543cb8e8ca5ea/slowfast/utils/parser.py """Configs.""" import math @@ -1282,10 +1284,16 @@ def assert_and_infer_cfg(cfg): def get_cfg(): + """ + Get a copy of the default config. + """ return _C.clone() def load_config(path_to_config=None): + """ + Given the arguemnts, load and initialize the configs. + """ # Setup cfg. cfg = get_cfg() diff --git a/src/kabr_tools/utils/slowfast/head.py b/src/kabr_tools/utils/slowfast/head.py index c16906c..999068f 100644 --- a/src/kabr_tools/utils/slowfast/head.py +++ b/src/kabr_tools/utils/slowfast/head.py @@ -1,4 +1,5 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# X3DHead from https://github.com/facebookresearch/SlowFast/blob/88bb4c9cf03fd97fce403f8b475543cb8e8ca5ea/slowfast/models/head_helper.py """ResNe(X)t Head helper.""" @@ -65,7 +66,6 @@ def __init__( self._construct_head(dim_in, dim_inner, dim_out, norm_module) def _construct_head(self, dim_in, dim_inner, dim_out, norm_module): - self.conv_5 = nn.Conv3d( dim_in, dim_inner, @@ -111,8 +111,7 @@ def _construct_head(self, dim_in, dim_inner, dim_out, norm_module): self.act = nn.Sigmoid() else: raise NotImplementedError( - "{} is not supported as an activation" "function.".format( - self.act_func) + "{} is not supported as an activation" "function.".format(self.act_func) ) def forward(self, inputs): diff --git a/src/kabr_tools/utils/slowfast/norm.py b/src/kabr_tools/utils/slowfast/norm.py index 34fd479..4558a47 100644 --- a/src/kabr_tools/utils/slowfast/norm.py +++ b/src/kabr_tools/utils/slowfast/norm.py @@ -1,4 +1,5 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/batchnorm_helper.py with unused import (NaiveSyncBatchNorm1d) removed """BatchNorm (BN) utility functions and custom batch-size BN implementations""" diff --git a/src/kabr_tools/utils/slowfast/resnet.py b/src/kabr_tools/utils/slowfast/resnet.py index d98c0f2..79a1387 100644 --- a/src/kabr_tools/utils/slowfast/resnet.py +++ b/src/kabr_tools/utils/slowfast/resnet.py @@ -1,4 +1,8 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# drop_path from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/common.py +# Nonlocal from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/nonlocal_helper.py +# SE, Swish from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/operators.py +# remaining code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/resnet_helper.py without slowfast imports """Video models.""" diff --git a/src/kabr_tools/utils/slowfast/stem.py b/src/kabr_tools/utils/slowfast/stem.py index beda2ec..d10d588 100644 --- a/src/kabr_tools/utils/slowfast/stem.py +++ b/src/kabr_tools/utils/slowfast/stem.py @@ -1,4 +1,6 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/stem_helper.py +# unused torch import removed """ResNe(X)t 3D stem helper.""" diff --git a/src/kabr_tools/utils/slowfast/utils.py b/src/kabr_tools/utils/slowfast/utils.py index ec3408d..111aa84 100644 --- a/src/kabr_tools/utils/slowfast/utils.py +++ b/src/kabr_tools/utils/slowfast/utils.py @@ -1,5 +1,8 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py +# get_sequence, pack_pathway_output, tensor_normalize from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/datasets/utils.py +# scale from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/datasets/cv2_transform.py +# process_cv2_inputs from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/utils.py +# get_input_clip from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py import math import cv2 @@ -9,6 +12,18 @@ def get_sequence(center_idx, half_len, sample_rate, num_frames): + """ + Sample frames among the corresponding clip. + + Args: + center_idx (int): center frame idx for current clip + half_len (int): half of the clip length + sample_rate (int): sampling rate for sampling frames inside of the clip + num_frames (int): number of expected sampled frames + + Returns: + seq (list): list of indexes of sampled frames in this clip. + """ seq = list(range(center_idx - half_len, center_idx + half_len, sample_rate)) for seq_idx in range(len(seq)): @@ -20,6 +35,16 @@ def get_sequence(center_idx, half_len, sample_rate, num_frames): def scale(size, image): + """ + Scale the short side of the image to size. + Args: + size (int): size to scale the image. + image (array): image to perform short side scale. Dimension is + `height` x `width` x `channel`. + Returns: + (ndarray): the scaled image with dimension of + `height` x `width` x `channel`. + """ height = image.shape[0] width = image.shape[1] if (width <= height and width == size) or (height <= width and height == size): @@ -36,6 +61,14 @@ def scale(size, image): def process_cv2_inputs(frames, cfg): + """ + Normalize and prepare inputs as a list of tensors. Each tensor + correspond to a unique pathway. + Args: + frames (list of array): list of input images (correspond to one clip) in range [0, 255]. + cfg (CfgNode): configs. Details can be found in + slowfast/config/defaults.py + """ inputs = torch.from_numpy(np.array(frames)).float() / 255 inputs = tensor_normalize(inputs, cfg.DATA.MEAN, cfg.DATA.STD) # T H W C -> C T H W. @@ -49,6 +82,13 @@ def process_cv2_inputs(frames, cfg): def tensor_normalize(tensor, mean, std, func=None): + """ + Normalize a given tensor by subtracting the mean and dividing the std. + Args: + tensor (tensor): tensor to normalize. + mean (tensor or list): mean value to subtract. + std (tensor or list): std to divide. + """ if tensor.dtype == torch.uint8: tensor = tensor.float() tensor = tensor / 255.0 @@ -64,6 +104,16 @@ def tensor_normalize(tensor, mean, std, func=None): def pack_pathway_output(cfg, frames): + """ + Prepare output as a list of tensors. Each tensor corresponding to a + unique pathway. + Args: + frames (tensor): frames of images sampled from the video. The + dimension is `channel` x `num frames` x `height` x `width`. + Returns: + frame_list (list): list of tensors with the dimension of + `channel` x `num frames` x `height` x `width`. + """ if cfg.DATA.REVERSE_INPUT_CHANNEL: frames = frames[[2, 1, 0], :, :, :] if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH: @@ -90,7 +140,15 @@ def pack_pathway_output(cfg, frames): def get_input_clip(cap: cv2.VideoCapture, cfg, keyframe_idx: int) -> list[Tensor]: - + """ + Get input clip from the video/folder of images for a given + keyframe index. + Args: + keyframe_idx (int): index of the current keyframe. + Returns: + clip (list of tensors): formatted input clip(s) corresponding to + the current keyframe. + """ seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) seq = get_sequence( diff --git a/src/kabr_tools/utils/slowfast/x3d.py b/src/kabr_tools/utils/slowfast/x3d.py index 628f51e..483bb3c 100644 --- a/src/kabr_tools/utils/slowfast/x3d.py +++ b/src/kabr_tools/utils/slowfast/x3d.py @@ -1,4 +1,9 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# round_width from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/utils.py +# init_weights from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/utils/weight_init_helper.py +# _POOL1, _TEMPORAL_KERNEL_BASIS, _MODEL_STAGE_DEPTH, X3D from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/video_model_builder.py +# simplify build_model from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/build.py +# replace slowfast imports with local imports import math import torch @@ -298,6 +303,13 @@ def forward(self, x, bboxes=None): def build_model(cfg, gpu_id=None): + """ + Builds the video model. + Args: + cfg (configs): configs that contains the hyper-parameters to build the + backbone. Details can be seen in slowfast/config/defaults.py. + gpu_id (Optional[int]): specify the gpu index to build model. + """ if torch.cuda.is_available(): assert ( cfg.NUM_GPUS <= torch.cuda.device_count()