Comment modifications

Imageomics · Dec 21, 2024 · 19b2878 · 19b2878
1 parent 7a313c8
commit 19b2878
Show file tree

Hide file tree

Showing 7 changed files with 89 additions and 5 deletions.
diff --git a/src/kabr_tools/utils/slowfast/cfg.py b/src/kabr_tools/utils/slowfast/cfg.py
@@ -1,4 +1,6 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# add code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/config/defaults.py except custom_config
+# add + simplify load_config function from https://github.com/facebookresearch/SlowFast/blob/88bb4c9cf03fd97fce403f8b475543cb8e8ca5ea/slowfast/utils/parser.py
 
 """Configs."""
 import math
@@ -1282,10 +1284,16 @@ def assert_and_infer_cfg(cfg):
 
 
 def get_cfg():
+    """
+    Get a copy of the default config.
+    """
     return _C.clone()
 
 
 def load_config(path_to_config=None):
+    """
+    Given the arguemnts, load and initialize the configs.
+    """
     # Setup cfg.
     cfg = get_cfg()
 

diff --git a/src/kabr_tools/utils/slowfast/head.py b/src/kabr_tools/utils/slowfast/head.py
@@ -1,4 +1,5 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# X3DHead from https://github.com/facebookresearch/SlowFast/blob/88bb4c9cf03fd97fce403f8b475543cb8e8ca5ea/slowfast/models/head_helper.py
 
 """ResNe(X)t Head helper."""
 
@@ -65,7 +66,6 @@ def __init__(
         self._construct_head(dim_in, dim_inner, dim_out, norm_module)
 
     def _construct_head(self, dim_in, dim_inner, dim_out, norm_module):
-
         self.conv_5 = nn.Conv3d(
             dim_in,
             dim_inner,
@@ -111,8 +111,7 @@ def _construct_head(self, dim_in, dim_inner, dim_out, norm_module):
             self.act = nn.Sigmoid()
         else:
             raise NotImplementedError(
-                "{} is not supported as an activation" "function.".format(
-                    self.act_func)
+                "{} is not supported as an activation" "function.".format(self.act_func)
             )
 
     def forward(self, inputs):

diff --git a/src/kabr_tools/utils/slowfast/norm.py b/src/kabr_tools/utils/slowfast/norm.py
@@ -1,4 +1,5 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/batchnorm_helper.py with unused import (NaiveSyncBatchNorm1d) removed
 
 """BatchNorm (BN) utility functions and custom batch-size BN implementations"""
 

diff --git a/src/kabr_tools/utils/slowfast/resnet.py b/src/kabr_tools/utils/slowfast/resnet.py
@@ -1,4 +1,8 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# drop_path from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/common.py
+# Nonlocal from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/nonlocal_helper.py
+# SE, Swish from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/operators.py
+# remaining code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/resnet_helper.py without slowfast imports
 
 """Video models."""
 

diff --git a/src/kabr_tools/utils/slowfast/stem.py b/src/kabr_tools/utils/slowfast/stem.py
@@ -1,4 +1,6 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# code from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/stem_helper.py
+# unused torch import removed
 
 """ResNe(X)t 3D stem helper."""
 

diff --git a/src/kabr_tools/utils/slowfast/utils.py b/src/kabr_tools/utils/slowfast/utils.py
@@ -1,5 +1,8 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-# https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py
+# get_sequence, pack_pathway_output, tensor_normalize from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/datasets/utils.py
+# scale from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/datasets/cv2_transform.py
+# process_cv2_inputs from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/utils.py
+# get_input_clip from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py
 
 import math
 import cv2
@@ -9,6 +12,18 @@
 
 
 def get_sequence(center_idx, half_len, sample_rate, num_frames):
+    """
+    Sample frames among the corresponding clip.
+
+    Args:
+        center_idx (int): center frame idx for current clip
+        half_len (int): half of the clip length
+        sample_rate (int): sampling rate for sampling frames inside of the clip
+        num_frames (int): number of expected sampled frames
+
+    Returns:
+        seq (list): list of indexes of sampled frames in this clip.
+    """
     seq = list(range(center_idx - half_len, center_idx + half_len, sample_rate))
 
     for seq_idx in range(len(seq)):
@@ -20,6 +35,16 @@ def get_sequence(center_idx, half_len, sample_rate, num_frames):
 
 
 def scale(size, image):
+    """
+    Scale the short side of the image to size.
+    Args:
+        size (int): size to scale the image.
+        image (array): image to perform short side scale. Dimension is
+            `height` x `width` x `channel`.
+    Returns:
+        (ndarray): the scaled image with dimension of
+            `height` x `width` x `channel`.
+    """
     height = image.shape[0]
     width = image.shape[1]
     if (width <= height and width == size) or (height <= width and height == size):
@@ -36,6 +61,14 @@ def scale(size, image):
 
 
 def process_cv2_inputs(frames, cfg):
+    """
+    Normalize and prepare inputs as a list of tensors. Each tensor
+    correspond to a unique pathway.
+    Args:
+        frames (list of array): list of input images (correspond to one clip) in range [0, 255].
+        cfg (CfgNode): configs. Details can be found in
+            slowfast/config/defaults.py
+    """
     inputs = torch.from_numpy(np.array(frames)).float() / 255
     inputs = tensor_normalize(inputs, cfg.DATA.MEAN, cfg.DATA.STD)
     # T H W C -> C T H W.
@@ -49,6 +82,13 @@ def process_cv2_inputs(frames, cfg):
 
 
 def tensor_normalize(tensor, mean, std, func=None):
+    """
+    Normalize a given tensor by subtracting the mean and dividing the std.
+    Args:
+        tensor (tensor): tensor to normalize.
+        mean (tensor or list): mean value to subtract.
+        std (tensor or list): std to divide.
+    """
     if tensor.dtype == torch.uint8:
         tensor = tensor.float()
         tensor = tensor / 255.0
@@ -64,6 +104,16 @@ def tensor_normalize(tensor, mean, std, func=None):
 
 
 def pack_pathway_output(cfg, frames):
+    """
+    Prepare output as a list of tensors. Each tensor corresponding to a
+    unique pathway.
+    Args:
+        frames (tensor): frames of images sampled from the video. The
+            dimension is `channel` x `num frames` x `height` x `width`.
+    Returns:
+        frame_list (list): list of tensors with the dimension of
+            `channel` x `num frames` x `height` x `width`.
+    """
     if cfg.DATA.REVERSE_INPUT_CHANNEL:
         frames = frames[[2, 1, 0], :, :, :]
     if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH:
@@ -90,7 +140,15 @@ def pack_pathway_output(cfg, frames):
 
 
 def get_input_clip(cap: cv2.VideoCapture, cfg, keyframe_idx: int) -> list[Tensor]:
-
+    """
+    Get input clip from the video/folder of images for a given
+    keyframe index.
+    Args:
+        keyframe_idx (int): index of the current keyframe.
+    Returns:
+        clip (list of tensors): formatted input clip(s) corresponding to
+            the current keyframe.
+    """
     seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     seq = get_sequence(

diff --git a/src/kabr_tools/utils/slowfast/x3d.py b/src/kabr_tools/utils/slowfast/x3d.py
@@ -1,4 +1,9 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# round_width from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/utils.py
+# init_weights from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/utils/weight_init_helper.py
+# _POOL1, _TEMPORAL_KERNEL_BASIS, _MODEL_STAGE_DEPTH, X3D from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/video_model_builder.py
+# simplify build_model from https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/models/build.py
+# replace slowfast imports with local imports
 
 import math
 import torch
@@ -298,6 +303,13 @@ def forward(self, x, bboxes=None):
 
 
 def build_model(cfg, gpu_id=None):
+    """
+    Builds the video model.
+    Args:
+        cfg (configs): configs that contains the hyper-parameters to build the
+        backbone. Details can be seen in slowfast/config/defaults.py.
+        gpu_id (Optional[int]): specify the gpu index to build model.
+    """
     if torch.cuda.is_available():
         assert (
             cfg.NUM_GPUS <= torch.cuda.device_count()