From 7294e405fe0a6c0ddc63295ede35c78fc28d93c1 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Mon, 2 Dec 2024 12:33:34 -0500 Subject: [PATCH] Added ability to configure constant dilation amount This includes setting the constant to 1, effectively disabling dilation at all. --- .../m2/feat_locsconfs_residualLinear.yaml | 5 +- .../m3/feat_locsconfs_residualLinear.yaml | 5 +- tcn_hpl/models/components/ms_tcs_net.py | 60 +++++++++++++++++-- 3 files changed, 58 insertions(+), 12 deletions(-) diff --git a/configs/experiment/m2/feat_locsconfs_residualLinear.yaml b/configs/experiment/m2/feat_locsconfs_residualLinear.yaml index 911c74eb9..d14e19794 100644 --- a/configs/experiment/m2/feat_locsconfs_residualLinear.yaml +++ b/configs/experiment/m2/feat_locsconfs_residualLinear.yaml @@ -45,15 +45,12 @@ model: factor: 0.9 patience: 10 net: - _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModelResidual + _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModel2 # Utilize "linear" dilation layers instead of exponential ones. linear_single_stage: true # 12 here allows for the largest residual layer to cover the first, middle # and last window index when in the middle of its convolutional sweep. num_layers: 12 - # These do_* booleans match the behavior of the original MultiStageModule. - do_stage_softmax: true - do_stage_residual: false # Length of feature vector for a single frame. # Currently derived from the parameterization of dataset vectorizer. dim: 102 diff --git a/configs/experiment/m3/feat_locsconfs_residualLinear.yaml b/configs/experiment/m3/feat_locsconfs_residualLinear.yaml index 9c8c8545c..a75505a64 100644 --- a/configs/experiment/m3/feat_locsconfs_residualLinear.yaml +++ b/configs/experiment/m3/feat_locsconfs_residualLinear.yaml @@ -45,15 +45,12 @@ model: factor: 0.9 patience: 10 net: - _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModelResidual + _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModel2 # Utilize "linear" dilation layers instead of exponential ones. linear_single_stage: true # 12 here allows for the largest residual layer to cover the first, middle # and last window index when in the middle of its convolutional sweep. num_layers: 12 - # These do_* booleans match the behavior of the original MultiStageModule. - do_stage_softmax: true - do_stage_residual: false # Length of feature vector for a single frame. # Currently derived from the parameterization of dataset vectorizer. dim: 97 diff --git a/tcn_hpl/models/components/ms_tcs_net.py b/tcn_hpl/models/components/ms_tcs_net.py index 538c5922d..a756eeb5b 100644 --- a/tcn_hpl/models/components/ms_tcs_net.py +++ b/tcn_hpl/models/components/ms_tcs_net.py @@ -1,4 +1,5 @@ import copy +import functools from typing import Sequence import einops @@ -69,7 +70,7 @@ def forward(self, x, mask): return outputs -class MultiStageModelResidual(nn.Module): +class MultiStageModel2(nn.Module): """ Similar to the MultiStageModel class, however stages after the first is added to the output of the previous. @@ -84,12 +85,17 @@ def __init__( num_f_maps: int, dim: int, num_classes: int, - linear_single_stage: bool = True, - do_stage_residual: bool = True, - do_stage_softmax: bool = False, + linear_single_stage: bool = False, + constant_single_stage: bool = False, + do_stage_residual: bool = False, + do_stage_softmax: bool = True, + constant_stage_dilation: int = 1, ): """Initialize a `MultiStageModel` module. + Default values match the behavior of the original `MultiStageModel` + implementation. + :param fc_sequence_dims: Create N*2 linear layers with u-net-like skip connections connecting inputs and outputs of the same dimensions. If an empty sequence is provided, then no FC layers are created @@ -102,21 +108,34 @@ def __init__( :param num_classes: Number of output classes. :param linear_single_stage: Use `SingleStageModelLinear` class for single stage layers, otherwise use `SingleStageModel`. + :param constant_single_stage: Use `SingleStageModelConstant` class for + single stage layers, otherwise use `SingleStageModel`. :param do_stage_residual: Enable adding previous stage output to successive stage outputs. Default True. :param do_stage_softmax: Enable performing a softmax operation on previous stage outputs before input to successive stages. This only affects the inout to a stage, and does not output the optional residual stage addition via `do_stage_residual`. Default False. + :param constant_stage_dilation: If `constant_single_stage` is selected, + then use this value for the dilation amount. This parameter is not + used otherwise. """ super().__init__() # One FC sequence that is applied to a single frame's feature vector, self.frame_fc = LinearSkipBlock([dim] + list(fc_sequence_dims), fc_sequence_dropout_p) + if sum([linear_single_stage, constant_single_stage]) > 1: + raise ValueError("Only one of the stage class selectors may be " + "specified at a time.") stage_class = SingleStageModel if linear_single_stage: stage_class = SingleStageModelLinear + if constant_single_stage: + stage_class = functools.partial( + SingleStageModelConstant, + dilation_amt=constant_stage_dilation, + ) self.stage1 = stage_class(num_layers, num_f_maps, dim, num_classes) self.stages = nn.ModuleList( @@ -281,6 +300,39 @@ def forward(self, x, mask): return out +class SingleStageModelConstant(nn.Module): + """ + Version of the SingleStageModel but where the amount of dilation in + successive layers is constant. + + The default dilation amount of 1 is the same as saying "don't dilate" as + this will result in no dilation in the 3x1 convolution layers. + + Input to the forward method should be shape (batch, dim, window_size). + """ + def __init__(self, num_layers, num_f_maps, dim, num_classes, dilation_amt=1): + super().__init__() + self.conv_1x1 = nn.Conv1d(dim, num_f_maps, 1) + self.layers = nn.ModuleList( + [ + copy.deepcopy(DilatedResidualLayer(dilation_amt, num_f_maps, num_f_maps)) + for _ in range(num_layers) + ] + ) + self.conv_out = nn.Conv1d(num_f_maps, num_classes, 1) + + def forward(self, x, mask): + + out = self.conv_1x1(x) + for layer in self.layers: + # the DR layers already add their output to the input, so no need + # do that here again. + out = layer(out, mask) + out = self.conv_out(out) * mask[:, None, :] + + return out + + class DilatedResidualLayer(nn.Module): def __init__(self, dilation, in_channels, out_channels): super(DilatedResidualLayer, self).__init__()