open-mmlab · Xiangxu-0103 · Aug 28, 2023 · Aug 29, 2023 · Aug 29, 2023 · Aug 29, 2023
diff --git a/configs/_base_/models/cylinder3d.py b/configs/_base_/models/cylinder3d.py
@@ -1,24 +1,24 @@
 grid_shape = [480, 360, 32]
+point_cloud_range = [0, -3.14159265359, -4, 50, 3.14159265359, 2]
 model = dict(
-    type='Cylinder3D',
+    type='VoxelSegmentor',
     data_preprocessor=dict(
         type='Det3DDataPreprocessor',
         voxel=True,
         voxel_type='cylindrical',
         voxel_layer=dict(
             grid_shape=grid_shape,
-            point_cloud_range=[0, -3.14159265359, -4, 50, 3.14159265359, 2],
+            point_cloud_range=point_cloud_range,
             max_num_points=-1,
-            max_voxels=-1,
-        ),
-    ),
+            max_voxels=-1)),
     voxel_encoder=dict(
         type='SegVFE',
-        feat_channels=[64, 128, 256, 256],
         in_channels=6,
+        feat_channels=[64, 128, 256, 256],
         with_voxel_center=True,
-        feat_compression=16,
-        return_point_feats=False),
+        grid_shape=grid_shape,
+        point_cloud_range=point_cloud_range,
+        feat_compression=16),
     backbone=dict(
         type='Asymm3DSpconv',
         grid_size=grid_shape,
@@ -29,13 +29,14 @@
         type='Cylinder3DHead',
         channels=128,
         num_classes=20,
+        dropout_ratio=0,
         loss_ce=dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=False,
             class_weight=None,
             loss_weight=1.0),
         loss_lovasz=dict(type='LovaszLoss', loss_weight=1.0, reduction='none'),
-    ),
+        conv_seg_kernel_size=3,
+        ignore_index=19),
     train_cfg=None,
-    test_cfg=dict(mode='whole'),
-)
+    test_cfg=dict(mode='whole'))
diff --git a/configs/_base_/models/dgcnn.py b/configs/_base_/models/dgcnn.py
@@ -19,7 +19,7 @@
         conv_cfg=dict(type='Conv1d'),
         norm_cfg=dict(type='BN1d'),
         act_cfg=dict(type='LeakyReLU', negative_slope=0.2),
-        loss_decode=dict(
+        loss_ce=dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=False,
             class_weight=None,  # modified with dataset

diff --git a/configs/_base_/models/minkunet.py b/configs/_base_/models/minkunet.py
@@ -1,5 +1,5 @@
 model = dict(
-    type='MinkUNet',
+    type='VoxelSegmentor',
     data_preprocessor=dict(
         type='Det3DDataPreprocessor',
         voxel=True,
@@ -26,8 +26,9 @@
         type='MinkUNetHead',
         channels=96,
         num_classes=19,
+        batch_first=False,
         dropout_ratio=0,
-        loss_decode=dict(type='mmdet.CrossEntropyLoss', avg_non_ignore=True),
+        loss_ce=dict(type='mmdet.CrossEntropyLoss', avg_non_ignore=True),
         ignore_index=19),
     train_cfg=dict(),
     test_cfg=dict())
diff --git a/configs/_base_/models/paconv_ssg.py b/configs/_base_/models/paconv_ssg.py
@@ -37,7 +37,7 @@
         conv_cfg=dict(type='Conv1d'),
         norm_cfg=dict(type='BN1d'),
         act_cfg=dict(type='ReLU'),
-        loss_decode=dict(
+        loss_ce=dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=False,
             class_weight=None,  # should be modified with dataset

diff --git a/configs/_base_/models/pointnet2_ssg.py b/configs/_base_/models/pointnet2_ssg.py
@@ -26,7 +26,7 @@
         conv_cfg=dict(type='Conv1d'),
         norm_cfg=dict(type='BN1d'),
         act_cfg=dict(type='ReLU'),
-        loss_decode=dict(
+        loss_ce=dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=False,
             class_weight=None,  # should be modified with dataset

diff --git a/configs/_base_/models/spvcnn.py b/configs/_base_/models/spvcnn.py
@@ -1,5 +1,5 @@
 model = dict(
-    type='MinkUNet',
+    type='VoxelSegmentor',
     data_preprocessor=dict(
         type='Det3DDataPreprocessor',
         voxel=True,
@@ -27,8 +27,9 @@
         type='MinkUNetHead',
         channels=96,
         num_classes=19,
+        batch_first=False,
         dropout_ratio=0,
-        loss_decode=dict(type='mmdet.CrossEntropyLoss', avg_non_ignore=True),
+        loss_ce=dict(type='mmdet.CrossEntropyLoss', avg_non_ignore=True),
         ignore_index=19),
     train_cfg=dict(),
     test_cfg=dict())
diff --git a/configs/cylinder3d/cylinder3d_4xb4-3x_semantickitti.py b/configs/cylinder3d/cylinder3d_4xb4-3x_semantickitti.py
@@ -27,12 +27,12 @@
         gamma=0.1)
 ]
 
-train_dataloader = dict(batch_size=4, )
+train_dataloader = dict(batch_size=4)
 
 # Default setting for scaling LR automatically
 #   - `enable` means enable scaling LR automatically
 #       or not by default.
 #   - `base_batch_size` = (8 GPUs) x (4 samples per GPU).
 # auto_scale_lr = dict(enable=False, base_batch_size=32)
 
-default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=5))
+default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=1))
diff --git a/configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py b/configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py
@@ -8,7 +8,7 @@
     backbone=dict(in_channels=9),  # [xyz, rgb, normalized_xyz]
     decode_head=dict(
         num_classes=13, ignore_index=13,
-        loss_decode=dict(class_weight=None)),  # S3DIS doesn't use class_weight
+        loss_ce=dict(class_weight=None)),  # S3DIS doesn't use class_weight
     test_cfg=dict(
         num_points=4096,
         block_size=1.0,

diff --git a/configs/minkunet/minkunet18_w16_torchsparse_8xb2-amp-15e_semantickitti.py b/configs/minkunet/minkunet18_w16_torchsparse_8xb2-amp-15e_semantickitti.py
@@ -1,4 +1,4 @@
-_base_ = ['./minkunet_w32_8xb2-15e_semantickitti.py']
+_base_ = ['./minkunet18_w32_torchsparse_8xb2-amp-15e_semantickitti.py']
 
 model = dict(
     backbone=dict(

diff --git a/configs/minkunet/minkunet18_w20_torchsparse_8xb2-amp-15e_semantickitti.py b/configs/minkunet/minkunet18_w20_torchsparse_8xb2-amp-15e_semantickitti.py
@@ -1,4 +1,4 @@
-_base_ = ['./minkunet_w32_8xb2-15e_semantickitti.py']
+_base_ = ['./minkunet18_w32_torchsparse_8xb2-amp-15e_semantickitti.py']
 
 model = dict(
     backbone=dict(

diff --git a/configs/minkunet/minkunet34_w32_minkowski_8xb2-laser-polar-mix-3x_semantickitti.py b/configs/minkunet/minkunet34_w32_minkowski_8xb2-laser-polar-mix-3x_semantickitti.py
@@ -4,4 +4,5 @@
 
 model = dict(
     data_preprocessor=dict(batch_first=True),
-    backbone=dict(sparseconv_backend='minkowski'))
+    backbone=dict(sparseconv_backend='minkowski'),
+    decode_head=dict(batch_first=True))
diff --git a/configs/minkunet/minkunet34_w32_spconv_8xb2-amp-laser-polar-mix-3x_semantickitti.py b/configs/minkunet/minkunet34_w32_spconv_8xb2-amp-laser-polar-mix-3x_semantickitti.py
@@ -4,6 +4,7 @@
 
 model = dict(
     data_preprocessor=dict(batch_first=True),
-    backbone=dict(sparseconv_backend='spconv'))
+    backbone=dict(sparseconv_backend='spconv'),
+    decode_head=dict(batch_first=True))
 
 optim_wrapper = dict(type='AmpOptimWrapper', loss_scale='dynamic')
diff --git a/configs/minkunet/minkunet34_w32_spconv_8xb2-laser-polar-mix-3x_semantickitti.py b/configs/minkunet/minkunet34_w32_spconv_8xb2-laser-polar-mix-3x_semantickitti.py
@@ -4,4 +4,5 @@
 
 model = dict(
     data_preprocessor=dict(batch_first=True),
-    backbone=dict(sparseconv_backend='spconv'))
+    backbone=dict(sparseconv_backend='spconv'),
+    decode_head=dict(batch_first=True))
diff --git a/configs/paconv/paconv_ssg-cuda_8xb8-cosine-200e_s3dis-seg.py b/configs/paconv/paconv_ssg-cuda_8xb8-cosine-200e_s3dis-seg.py
@@ -7,7 +7,7 @@
 model = dict(
     decode_head=dict(
         num_classes=13, ignore_index=13,
-        loss_decode=dict(class_weight=None)),  # S3DIS doesn't use class_weight
+        loss_ce=dict(class_weight=None)),  # S3DIS doesn't use class_weight
     test_cfg=dict(
         num_points=4096,
         block_size=1.0,

diff --git a/configs/paconv/paconv_ssg_8xb8-cosine-150e_s3dis-seg.py b/configs/paconv/paconv_ssg_8xb8-cosine-150e_s3dis-seg.py
@@ -7,7 +7,7 @@
 model = dict(
     decode_head=dict(
         num_classes=13, ignore_index=13,
-        loss_decode=dict(class_weight=None)),  # S3DIS doesn't use class_weight
+        loss_ce=dict(class_weight=None)),  # S3DIS doesn't use class_weight
     test_cfg=dict(
         num_points=4096,
         block_size=1.0,

diff --git a/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg-xyz-only.py b/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg-xyz-only.py
@@ -13,7 +13,7 @@
         # `data/scannet/seg_info/train_label_weight.npy`
         # you can copy paste the values here, or input the file path as
         # `class_weight=data/scannet/seg_info/train_label_weight.npy`
-        loss_decode=dict(class_weight=[
+        loss_ce=dict(class_weight=[
             2.389689, 2.7215734, 4.5944676, 4.8543367, 4.096086, 4.907941,
             4.690836, 4.512031, 4.623311, 4.9242644, 5.358117, 5.360071,
             5.019636, 4.967126, 5.3502126, 5.4023647, 5.4027233, 5.4169416,

diff --git a/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py b/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
@@ -12,7 +12,7 @@
         # `data/scannet/seg_info/train_label_weight.npy`
         # you can copy paste the values here, or input the file path as
         # `class_weight=data/scannet/seg_info/train_label_weight.npy`
-        loss_decode=dict(class_weight=[
+        loss_ce=dict(class_weight=[
             2.389689, 2.7215734, 4.5944676, 4.8543367, 4.096086, 4.907941,
             4.690836, 4.512031, 4.623311, 4.9242644, 5.358117, 5.360071,
             5.019636, 4.967126, 5.3502126, 5.4023647, 5.4027233, 5.4169416,

diff --git a/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py b/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
@@ -8,7 +8,7 @@
     backbone=dict(in_channels=9),  # [xyz, rgb, normalized_xyz]
     decode_head=dict(
         num_classes=13, ignore_index=13,
-        loss_decode=dict(class_weight=None)),  # S3DIS doesn't use class_weight
+        loss_ce=dict(class_weight=None)),  # S3DIS doesn't use class_weight
     test_cfg=dict(
         num_points=4096,
         block_size=1.0,

diff --git a/configs/pointnet2/pointnet2_ssg_2xb16-cosine-200e_scannet-seg-xyz-only.py b/configs/pointnet2/pointnet2_ssg_2xb16-cosine-200e_scannet-seg-xyz-only.py
@@ -13,7 +13,7 @@
         # `data/scannet/seg_info/train_label_weight.npy`
         # you can copy paste the values here, or input the file path as
         # `class_weight=data/scannet/seg_info/train_label_weight.npy`
-        loss_decode=dict(class_weight=[
+        loss_ce=dict(class_weight=[
             2.389689, 2.7215734, 4.5944676, 4.8543367, 4.096086, 4.907941,
             4.690836, 4.512031, 4.623311, 4.9242644, 5.358117, 5.360071,
             5.019636, 4.967126, 5.3502126, 5.4023647, 5.4027233, 5.4169416,

diff --git a/configs/pointnet2/pointnet2_ssg_2xb16-cosine-200e_scannet-seg.py b/configs/pointnet2/pointnet2_ssg_2xb16-cosine-200e_scannet-seg.py
@@ -12,7 +12,7 @@
         # `data/scannet/seg_info/train_label_weight.npy`
         # you can copy paste the values here, or input the file path as
         # `class_weight=data/scannet/seg_info/train_label_weight.npy`
-        loss_decode=dict(class_weight=[
+        loss_ce=dict(class_weight=[
             2.389689, 2.7215734, 4.5944676, 4.8543367, 4.096086, 4.907941,
             4.690836, 4.512031, 4.623311, 4.9242644, 5.358117, 5.360071,
             5.019636, 4.967126, 5.3502126, 5.4023647, 5.4027233, 5.4169416,

diff --git a/configs/pointnet2/pointnet2_ssg_2xb16-cosine-50e_s3dis-seg.py b/configs/pointnet2/pointnet2_ssg_2xb16-cosine-50e_s3dis-seg.py
@@ -8,7 +8,7 @@
     backbone=dict(in_channels=9),  # [xyz, rgb, normalized_xyz]
     decode_head=dict(
         num_classes=13, ignore_index=13,
-        loss_decode=dict(class_weight=None)),  # S3DIS doesn't use class_weight
+        loss_ce=dict(class_weight=None)),  # S3DIS doesn't use class_weight
     test_cfg=dict(
         num_points=4096,
         block_size=1.0,

diff --git a/configs/spvcnn/spvcnn_w16_8xb2-amp-15e_semantickitti.py b/configs/spvcnn/spvcnn_w16_8xb2-amp-15e_semantickitti.py
@@ -1,4 +1,4 @@
-_base_ = ['./spvcnn_w32_8xb2-15e_semantickitti.py']
+_base_ = ['./spvcnn_w32_8xb2-amp-15e_semantickitti.py']
 
 model = dict(
     backbone=dict(

diff --git a/configs/spvcnn/spvcnn_w20_8xb2-amp-15e_semantickitti.py b/configs/spvcnn/spvcnn_w20_8xb2-amp-15e_semantickitti.py
@@ -1,4 +1,4 @@
-_base_ = ['./spvcnn_w32_8xb2-15e_semantickitti.py']
+_base_ = ['./spvcnn_w32_8xb2-amp-15e_semantickitti.py']
 
 model = dict(
     backbone=dict(

diff --git a/mmdet3d/configs/_base_/models/cylinder3d.py b/mmdet3d/configs/_base_/models/cylinder3d.py
@@ -1,32 +1,32 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from mmdet3d.models import Cylinder3D
 from mmdet3d.models.backbones import Asymm3DSpconv
 from mmdet3d.models.data_preprocessors import Det3DDataPreprocessor
 from mmdet3d.models.decode_heads.cylinder3d_head import Cylinder3DHead
 from mmdet3d.models.losses import LovaszLoss
+from mmdet3d.models.segmentors import VoxelSegmentor
 from mmdet3d.models.voxel_encoders import SegVFE
 
 grid_shape = [480, 360, 32]
+point_cloud_range = [0, -3.14159265359, -4, 50, 3.14159265359, 2]
 model = dict(
-    type=Cylinder3D,
+    type=VoxelSegmentor,
     data_preprocessor=dict(
         type=Det3DDataPreprocessor,
         voxel=True,
         voxel_type='cylindrical',
         voxel_layer=dict(
             grid_shape=grid_shape,
-            point_cloud_range=[0, -3.14159265359, -4, 50, 3.14159265359, 2],
+            point_cloud_range=point_cloud_range,
             max_num_points=-1,
-            max_voxels=-1,
-        ),
-    ),
+            max_voxels=-1)),
     voxel_encoder=dict(
         type=SegVFE,
-        feat_channels=[64, 128, 256, 256],
         in_channels=6,
+        feat_channels=[64, 128, 256, 256],
         with_voxel_center=True,
-        feat_compression=16,
-        return_point_feats=False),
+        grid_shape=grid_shape,
+        point_cloud_range=point_cloud_range,
+        feat_compression=16),
     backbone=dict(
         type=Asymm3DSpconv,
         grid_size=grid_shape,
@@ -37,13 +37,14 @@
         type=Cylinder3DHead,
         channels=128,
         num_classes=20,
+        dropout_ratio=0,
         loss_ce=dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=False,
             class_weight=None,
             loss_weight=1.0),
         loss_lovasz=dict(type=LovaszLoss, loss_weight=1.0, reduction='none'),
-    ),
+        conv_seg_kernel_size=3,
+        ignore_index=19),
     train_cfg=None,
-    test_cfg=dict(mode='whole'),
-)
+    test_cfg=dict(mode='whole'))
diff --git a/mmdet3d/configs/_base_/models/minkunet.py b/mmdet3d/configs/_base_/models/minkunet.py
@@ -3,10 +3,10 @@
 from mmdet3d.models.data_preprocessors.data_preprocessor import \
     Det3DDataPreprocessor
 from mmdet3d.models.decode_heads.minkunet_head import MinkUNetHead
-from mmdet3d.models.segmentors.minkunet import MinkUNet
+from mmdet3d.models.segmentors import VoxelSegmentor
 
 model = dict(
-    type=MinkUNet,
+    type=VoxelSegmentor,
     data_preprocessor=dict(
         type=Det3DDataPreprocessor,
         voxel=True,
@@ -34,7 +34,7 @@
         channels=96,
         num_classes=19,
         dropout_ratio=0,
-        loss_decode=dict(type='mmdet.CrossEntropyLoss', avg_non_ignore=True),
+        loss_ce=dict(type='mmdet.CrossEntropyLoss', avg_non_ignore=True),
         ignore_index=19),
     train_cfg=dict(),
     test_cfg=dict())
diff --git a/mmdet3d/configs/cylinder3d/cylinder3d_4xb4-3x_semantickitti.py b/mmdet3d/configs/cylinder3d/cylinder3d_4xb4-3x_semantickitti.py
@@ -32,7 +32,7 @@
         gamma=0.1)
 ]
 
-train_dataloader.update(dict(batch_size=4, ))
+train_dataloader.update(dict(batch_size=4))
 
 # Default setting for scaling LR automatically
 #   - `enable` means enable scaling LR automatically

diff --git a/mmdet3d/models/backbones/cylinder3d.py b/mmdet3d/models/backbones/cylinder3d.py
@@ -13,7 +13,6 @@
 from mmcv.ops import (SparseConv3d, SparseConvTensor, SparseInverseConv3d,
                       SubMConv3d)
 from mmengine.model import BaseModule
-from torch import Tensor
 
 from mmdet3d.registry import MODELS
 from mmdet3d.utils import ConfigType
@@ -457,12 +456,14 @@
             indice_key='ddcm',
             norm_cfg=norm_cfg)
 
-    def forward(self, voxel_features: Tensor, coors: Tensor,
-                batch_size: int) -> SparseConvTensor:
+    def forward(self, voxel_dict: dict) -> dict:
         """Forward pass."""
-        coors = coors.int()
-        ret = SparseConvTensor(voxel_features, coors, np.array(self.grid_size),
-                               batch_size)
+        voxel_features = voxel_dict['voxel_feats']
+        voxel_coors = voxel_dict['voxel_coors']
+        voxel_coors = voxel_coors.int()
+        batch_size = voxel_dict['coors'][-1, 0].item() + 1
+        ret = SparseConvTensor(voxel_features, voxel_coors,
+                               np.array(self.grid_size), batch_size)
         ret = self.down_context(ret)
 
         down_skip_list = []
@@ -477,5 +478,6 @@
 
         ddcm = self.ddcm(up)
         ddcm.features = torch.cat((ddcm.features, up.features), 1)
+        voxel_dict['voxel_feats'] = ddcm
 
-        return ddcm
+        return voxel_dict