From b10c39acfbaf4fbfc0563746c5dd9ceef3b7a17c Mon Sep 17 00:00:00 2001 From: JingweiZhang12 Date: Tue, 20 Jun 2023 15:09:32 +0800 Subject: [PATCH 1/7] support training dsvt --- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 97 ++++- projects/DSVT/dsvt/dsvt.py | 6 +- projects/DSVT/dsvt/dsvt_head.py | 367 +++++++++++++++++- projects/DSVT/dsvt/utils.py | 141 ++++++- 4 files changed, 594 insertions(+), 17 deletions(-) diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index ac7a38b9bc..f9288f09e3 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -88,25 +88,28 @@ loss_cls=dict( type='mmdet.GaussianFocalLoss', reduction='mean', loss_weight=1.0), loss_bbox=dict(type='mmdet.L1Loss', reduction='mean', loss_weight=2.0), + loss_iou=dict(type='mmdet.L1Loss', reduction='none', loss_weight=1.0), + loss_reg_iou=dict( + type='mmdet3d.DIoU3DLoss', reduction='mean', loss_weight=2.0), norm_bbox=True), # model training and testing settings train_cfg=dict( - pts=dict( - grid_size=grid_size, - voxel_size=voxel_size, - out_size_factor=4, - dense_reg=1, - gaussian_overlap=0.1, - max_objs=500, - min_radius=2, - code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])), + grid_size=grid_size, + voxel_size=voxel_size, + point_cloud_range=point_cloud_range, + out_size_factor=1, + dense_reg=1, + gaussian_overlap=0.1, + max_objs=500, + min_radius=2, + code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]), test_cfg=dict( max_per_img=500, max_pool_nms=False, min_radius=[4, 12, 10, 1, 0.85, 0.175], iou_rectifier=[[0.68, 0.71, 0.65]], pc_range=[-80, -80], - out_size_factor=4, + out_size_factor=1, voxel_size=voxel_size[:2], nms_type='rotate', multi_class_nms=True, @@ -149,11 +152,16 @@ # remove_close=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='ObjectSample', db_sampler=db_sampler), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=0.5, + flip_ratio_bev_vertical=0.5), dict( type='GlobalRotScaleTrans', rot_range=[-0.78539816, 0.78539816], scale_ratio_range=[0.95, 1.05], - translation_std=[0.5, 0.5, 0]), + translation_std=[0.5, 0.5, 0.5]), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectNameFilter', classes=class_names), @@ -191,6 +199,26 @@ ] dataset_type = 'WaymoDataset' +train_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='waymo_infos_train.pkl', + data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'), + pipeline=train_pipeline, + modality=input_modality, + test_mode=False, + metainfo=metainfo, + # we use box_type_3d='LiDAR' in kitti and nuscenes dataset + # and box_type_3d='Depth' in sunrgbd and scannet dataset. + box_type_3d='LiDAR', + # load one frame every five frames + load_interval=5, + backend_args=backend_args)) val_dataloader = dict( batch_size=4, num_workers=4, @@ -223,6 +251,52 @@ vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') +lr = 1e-5 +# This schedule is mainly used by models on nuScenes dataset +# max_norm=10 is better for SECOND +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='Adam', lr=lr, weight_decay=0.05, betas=(0.9, 0.999)), + clip_grad=dict(max_norm=10, norm_type=2)) +# learning rate +param_scheduler = [ + dict( + type='CosineAnnealingLR', + T_max=1.2, + eta_min=lr * 100, + begin=0, + end=1.2, + by_epoch=True, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=10.8, + eta_min=lr * 1e-4, + begin=1.2, + end=12, + by_epoch=True, + convert_to_iter_based=True), + # momentum scheduler + dict( + type='CosineAnnealingMomentum', + T_max=1.2, + eta_min=0.85, + begin=0, + end=1.2, + by_epoch=True, + convert_to_iter_based=True), + dict( + type='CosineAnnealingMomentum', + T_max=10.8, + eta_min=0.95, + begin=1.2, + end=12, + by_epoch=True, + convert_to_iter_based=True) +] + +# runtime settings +train_cfg = dict(by_epoch=True, max_epochs=12, val_interval=12) # runtime settings val_cfg = dict() @@ -237,3 +311,4 @@ default_hooks = dict( logger=dict(type='LoggerHook', interval=50), checkpoint=dict(type='CheckpointHook', interval=5)) +custom_hooks = [dict(type='DisableObjectSampleHook', disable_after_epoch=11)] diff --git a/projects/DSVT/dsvt/dsvt.py b/projects/DSVT/dsvt/dsvt.py index 3c5405679f..e6f6ceffbe 100644 --- a/projects/DSVT/dsvt/dsvt.py +++ b/projects/DSVT/dsvt/dsvt.py @@ -103,7 +103,11 @@ def loss(self, batch_inputs_dict: Dict[List, torch.Tensor], Returns: dict[str, Tensor]: A dictionary of loss components. """ - pass + pts_feats = self.extract_feat(batch_inputs_dict) + losses = dict() + loss = self.bbox_head.loss(pts_feats, batch_data_samples) + losses.update(loss) + return losses def predict(self, batch_inputs_dict: Dict[str, Optional[Tensor]], batch_data_samples: List[Det3DDataSample], diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index 35167919f0..2d7d8d03db 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -1,12 +1,15 @@ from typing import Dict, List, Tuple import torch +from mmcv.ops import boxes_iou3d from mmdet.models.utils import multi_apply from mmengine.structures import InstanceData from torch import Tensor from mmdet3d.models import CenterHead from mmdet3d.models.layers import circle_nms, nms_bev +from mmdet3d.models.utils import (clip_sigmoid, draw_heatmap_gaussian, + gaussian_radius) from mmdet3d.registry import MODELS from mmdet3d.structures import Det3DDataSample, xywhr2xyxyr @@ -18,8 +21,16 @@ class DSVTCenterHead(CenterHead): This head adds IoU prediction branch based on the original CenterHead. """ - def __init__(self, *args, **kwargs): + def __init__(self, + *args, + loss_iou=dict( + type='mmdet.L1Loss', reduction='mean', loss_weight=1), + loss_reg_iou=None, + **kwargs): super(DSVTCenterHead, self).__init__(*args, **kwargs) + self.loss_iou = MODELS.build(loss_iou) + self.loss_iou_reg = MODELS.build( + loss_reg_iou) if loss_reg_iou is not None else None def forward_single(self, x: Tensor) -> dict: """Forward function for CenterPoint. @@ -66,7 +77,296 @@ def loss(self, pts_feats: List[Tensor], Returns: dict: Losses of each branch. """ - pass + outs = self(pts_feats) + batch_gt_instance_3d = [] + for data_sample in batch_data_samples: + batch_gt_instance_3d.append(data_sample.gt_instances_3d) + losses = self.loss_by_feat(outs, batch_gt_instance_3d) + return losses + + def _decode_all_preds(self, + pred_dict, + point_cloud_range=None, + voxel_size=None): + batch_size, _, H, W = pred_dict['reg'].shape + + batch_center = pred_dict['reg'].permute(0, 2, 3, 1).contiguous().view( + batch_size, H * W, 2) # (B, H, W, 2) + batch_center_z = pred_dict['height'].permute( + 0, 2, 3, 1).contiguous().view(batch_size, H * W, 1) # (B, H, W, 1) + batch_dim = pred_dict['dim'].exp().permute( + 0, 2, 3, 1).contiguous().view(batch_size, H * W, 3) # (B, H, W, 3) + batch_rot_cos = pred_dict['rot'][:, 0].unsqueeze(dim=1).permute( + 0, 2, 3, 1).contiguous().view(batch_size, H * W, 1) # (B, H, W, 1) + batch_rot_sin = pred_dict['rot'][:, 1].unsqueeze(dim=1).permute( + 0, 2, 3, 1).contiguous().view(batch_size, H * W, 1) # (B, H, W, 1) + batch_vel = pred_dict['vel'].permute(0, 2, 3, 1).contiguous().view( + batch_size, H * W, 2) if 'vel' in pred_dict.keys() else None + + angle = torch.atan2(batch_rot_sin, batch_rot_cos) # (B, H*W, 1) + + ys, xs = torch.meshgrid([ + torch.arange( + 0, H, device=batch_center.device, dtype=batch_center.dtype), + torch.arange( + 0, W, device=batch_center.device, dtype=batch_center.dtype) + ]) + ys = ys.view(1, H, W).repeat(batch_size, 1, 1) + xs = xs.view(1, H, W).repeat(batch_size, 1, 1) + xs = xs.view(batch_size, -1, 1) + batch_center[:, :, 0:1] + ys = ys.view(batch_size, -1, 1) + batch_center[:, :, 1:2] + + xs = xs * voxel_size[0] + point_cloud_range[0] + ys = ys * voxel_size[1] + point_cloud_range[1] + + box_part_list = [xs, ys, batch_center_z, batch_dim, angle] + if batch_vel is not None: + box_part_list.append(batch_vel) + + box_preds = torch.cat((box_part_list), + dim=-1).view(batch_size, H, W, -1) + + return box_preds + + def _transpose_and_gather_feat(self, feat, ind): + feat = feat.permute(0, 2, 3, 1).contiguous() + feat = feat.view(feat.size(0), -1, feat.size(3)) + feat = self._gather_feat(feat, ind) + return feat + + def calc_iou_loss(self, iou_preds, batch_box_preds, mask, ind, gt_boxes): + """ + Args: + iou_preds: (batch x 1 x h x w) + batch_box_preds: (batch x (7 or 9) x h x w) + mask: (batch x max_objects) + ind: (batch x max_objects) + gt_boxes: List of batch groundtruth boxes. + + Returns: + """ + if mask.sum() == 0: + return iou_preds.new_zeros((1)) + + mask = mask.bool() + selected_iou_preds = self._transpose_and_gather_feat(iou_preds, + ind)[mask] + + selected_box_preds = self._transpose_and_gather_feat( + batch_box_preds, ind)[mask] + gt_boxes = torch.cat(gt_boxes) + assert gt_boxes.size(0) == selected_box_preds.size(0) + iou_target = boxes_iou3d(selected_box_preds[:, 0:7], gt_boxes[:, 0:7]) + iou_target = torch.diag(iou_target).view(-1) + iou_target = iou_target * 2 - 1 # [0, 1] ==> [-1, 1] + + loss = self.loss_iou(selected_iou_preds.view(-1), iou_target) + return loss + + def calc_iou_reg_loss(self, batch_box_preds, mask, ind, gt_boxes): + if mask.sum() == 0: + return batch_box_preds.new_zeros((1)) + + mask = mask.bool() + + selected_box_preds = self._transpose_and_gather_feat( + batch_box_preds, ind)[mask] + gt_boxes = torch.cat(gt_boxes) + assert gt_boxes.size(0) == selected_box_preds.size(0) + loss = self.loss_iou_reg(selected_box_preds[:, 0:7], gt_boxes[:, 0:7]) + + return loss + + def get_targets( + self, + batch_gt_instances_3d: List[InstanceData], + ) -> Tuple[List[Tensor]]: + """Generate targets. + + How each output is transformed: + + Each nested list is transposed so that all same-index elements in + each sub-list (1, ..., N) become the new sub-lists. + [ [a0, a1, a2, ... ], [b0, b1, b2, ... ], ... ] + ==> [ [a0, b0, ... ], [a1, b1, ... ], [a2, b2, ... ] ] + + The new transposed nested list is converted into a list of N + tensors generated by concatenating tensors in the new sub-lists. + [ tensor0, tensor1, tensor2, ... ] + + Args: + batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of + gt_instances. It usually includes ``bboxes_3d`` and\ + ``labels_3d`` attributes. + + Returns: + Returns: + tuple[list[torch.Tensor]]: Tuple of target including + the following results in order. + + - list[torch.Tensor]: Heatmap scores. + - list[torch.Tensor]: Ground truth boxes. + - list[torch.Tensor]: Indexes indicating the + position of the valid boxes. + - list[torch.Tensor]: Masks indicating which + boxes are valid. + """ + heatmaps, anno_boxes, inds, masks, task_gt_bboxes = multi_apply( + self.get_targets_single, batch_gt_instances_3d) + # Transpose heatmaps + heatmaps = list(map(list, zip(*heatmaps))) + heatmaps = [torch.stack(hms_) for hms_ in heatmaps] + # Transpose anno_boxes + anno_boxes = list(map(list, zip(*anno_boxes))) + anno_boxes = [torch.stack(anno_boxes_) for anno_boxes_ in anno_boxes] + # Transpose inds + inds = list(map(list, zip(*inds))) + inds = [torch.stack(inds_) for inds_ in inds] + # Transpose inds + masks = list(map(list, zip(*masks))) + masks = [torch.stack(masks_) for masks_ in masks] + # Transpose inds + task_gt_bboxes = list(map(list, zip(*task_gt_bboxes))) + return heatmaps, anno_boxes, inds, masks, task_gt_bboxes + + def get_targets_single(self, + gt_instances_3d: InstanceData) -> Tuple[Tensor]: + """Generate training targets for a single sample. + + Args: + gt_instances_3d (:obj:`InstanceData`): Gt_instances of + single data sample. It usually includes + ``bboxes_3d`` and ``labels_3d`` attributes. + + Returns: + tuple[list[torch.Tensor]]: Tuple of target including + the following results in order. + + - list[torch.Tensor]: Heatmap scores. + - list[torch.Tensor]: Ground truth boxes. + - list[torch.Tensor]: Indexes indicating the position + of the valid boxes. + - list[torch.Tensor]: Masks indicating which boxes + are valid. + """ + gt_labels_3d = gt_instances_3d.labels_3d + gt_bboxes_3d = gt_instances_3d.bboxes_3d + device = gt_labels_3d.device + gt_bboxes_3d = torch.cat( + (gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]), + dim=1).to(device) + max_objs = self.train_cfg['max_objs'] * self.train_cfg['dense_reg'] + grid_size = torch.tensor(self.train_cfg['grid_size']).to(device) + pc_range = torch.tensor(self.train_cfg['point_cloud_range']) + voxel_size = torch.tensor(self.train_cfg['voxel_size']) + + feature_map_size = grid_size[:2] // self.train_cfg['out_size_factor'] + + # reorganize the gt_dict by tasks + task_masks = [] + flag = 0 + for class_name in self.class_names: + task_masks.append([ + torch.where(gt_labels_3d == class_name.index(i) + flag) + for i in class_name + ]) + flag += len(class_name) + + task_boxes = [] + task_classes = [] + flag2 = 0 + for idx, mask in enumerate(task_masks): + task_box = [] + task_class = [] + for m in mask: + task_box.append(gt_bboxes_3d[m]) + # 0 is background for each task, so we need to add 1 here. + task_class.append(gt_labels_3d[m] + 1 - flag2) + task_boxes.append(torch.cat(task_box, axis=0).to(device)) + task_classes.append(torch.cat(task_class).long().to(device)) + flag2 += len(mask) + draw_gaussian = draw_heatmap_gaussian + heatmaps, anno_boxes, inds, masks = [], [], [], [] + + for idx, task_head in enumerate(self.task_heads): + heatmap = gt_bboxes_3d.new_zeros( + (len(self.class_names[idx]), feature_map_size[1], + feature_map_size[0])) + + anno_box = gt_bboxes_3d.new_zeros((max_objs, 8), + dtype=torch.float32) + + ind = gt_labels_3d.new_zeros((max_objs), dtype=torch.int64) + mask = gt_bboxes_3d.new_zeros((max_objs), dtype=torch.uint8) + + num_objs = min(task_boxes[idx].shape[0], max_objs) + + for k in range(num_objs): + cls_id = task_classes[idx][k] - 1 + + length = task_boxes[idx][k][3] + width = task_boxes[idx][k][4] + length = length / voxel_size[0] / self.train_cfg[ + 'out_size_factor'] + width = width / voxel_size[1] / self.train_cfg[ + 'out_size_factor'] + + if width > 0 and length > 0: + radius = gaussian_radius( + (width, length), + min_overlap=self.train_cfg['gaussian_overlap']) + radius = max(self.train_cfg['min_radius'], int(radius)) + + # be really careful for the coordinate system of + # your box annotation. + x, y, z = task_boxes[idx][k][0], task_boxes[idx][k][ + 1], task_boxes[idx][k][2] + + coor_x = ( + x - pc_range[0] + ) / voxel_size[0] / self.train_cfg['out_size_factor'] + coor_y = ( + y - pc_range[1] + ) / voxel_size[1] / self.train_cfg['out_size_factor'] + + center = torch.tensor([coor_x, coor_y], + dtype=torch.float32, + device=device) + center_int = center.to(torch.int32) + + # throw out not in range objects to avoid out of array + # area when creating the heatmap + if not (0 <= center_int[0] < feature_map_size[0] + and 0 <= center_int[1] < feature_map_size[1]): + continue + + draw_gaussian(heatmap[cls_id], center_int, radius) + + new_idx = k + x, y = center_int[0], center_int[1] + + assert (y * feature_map_size[0] + x < + feature_map_size[0] * feature_map_size[1]) + + ind[new_idx] = y * feature_map_size[0] + x + mask[new_idx] = 1 + # TODO: support other outdoor dataset + rot = task_boxes[idx][k][6] + box_dim = task_boxes[idx][k][3:6] + if self.norm_bbox: + box_dim = box_dim.log() + anno_box[new_idx] = torch.cat([ + center - torch.tensor([x, y], device=device), + z.unsqueeze(0), box_dim, + torch.sin(rot).unsqueeze(0), + torch.cos(rot).unsqueeze(0) + ]) + + heatmaps.append(heatmap) + anno_boxes.append(anno_box) + masks.append(mask) + inds.append(ind) + return heatmaps, anno_boxes, inds, masks, task_boxes def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], *args, @@ -79,13 +379,72 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], tasks head, and the internal list indicate different FPN level. batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of - gt_instances. It usually includes ``bboxes_3d`` and\ + gt_instances. It usually includes ``bboxes_3d`` and ``labels_3d`` attributes. Returns: dict[str,torch.Tensor]: Loss of heatmap and bbox of each task. """ - pass + heatmaps, anno_boxes, inds, masks, task_gt_bboxes = self.get_targets( + batch_gt_instances_3d) + loss_dict = dict() + for task_id, preds_dict in enumerate(preds_dicts): + # heatmap focal loss + preds_dict[0]['heatmap'] = clip_sigmoid(preds_dict[0]['heatmap']) + num_pos = heatmaps[task_id].eq(1).float().sum().item() + loss_heatmap = self.loss_cls( + preds_dict[0]['heatmap'], + heatmaps[task_id], + avg_factor=max(num_pos, 1)) + target_box = anno_boxes[task_id] + # reconstruct the anno_box from multiple reg heads + preds_dict[0]['anno_box'] = torch.cat( + (preds_dict[0]['reg'], preds_dict[0]['height'], + preds_dict[0]['dim'], preds_dict[0]['rot']), + dim=1) + + # Regression loss for dimension, offset, height, rotation + ind = inds[task_id] + num = masks[task_id].float().sum() + pred = preds_dict[0]['anno_box'].permute(0, 2, 3, 1).contiguous() + pred = pred.view(pred.size(0), -1, pred.size(3)) + pred = self._gather_feat(pred, ind) + mask = masks[task_id].unsqueeze(2).expand_as(target_box).float() + isnotnan = (~torch.isnan(target_box)).float() + mask *= isnotnan + + code_weights = self.train_cfg.get('code_weights', None) + bbox_weights = mask * mask.new_tensor(code_weights) + loss_bbox = self.loss_bbox( + pred, target_box, bbox_weights, avg_factor=(num + 1e-4)) + loss_dict[f'task{task_id}.loss_heatmap'] = loss_heatmap + loss_dict[f'task{task_id}.loss_bbox'] = loss_bbox + + if 'iou' in preds_dict[0]: + batch_box_preds = self._decode_all_preds( + pred_dict=preds_dict[0], + point_cloud_range=self.train_cfg['point_cloud_range'], + voxel_size=self.train_cfg['voxel_size'] + ) # (B, H, W, 7 or 9) + + batch_box_preds_for_iou = batch_box_preds.permute( + 0, 3, 1, 2) # (B, 7 or 9, H, W) + loss_dict[f'task{task_id}.loss_iou'] = self.calc_iou_loss( + iou_preds=preds_dict[0]['iou'], + batch_box_preds=batch_box_preds_for_iou.clone().detach(), + mask=mask.all(dim=-1), + ind=ind, + gt_boxes=task_gt_bboxes[task_id]) + + if self.loss_iou_reg is not None: + loss_dict[f'task{task_id}.loss_reg_iou'] = \ + self.calc_iou_reg_loss( + batch_box_preds=batch_box_preds_for_iou, + mask=mask.all(dim=-1), + ind=ind, + gt_boxes=task_gt_bboxes[task_id]) + + return loss_dict def predict(self, pts_feats: Tuple[torch.Tensor], diff --git a/projects/DSVT/dsvt/utils.py b/projects/DSVT/dsvt/utils.py index 7c40383ce7..4d697d2dbb 100644 --- a/projects/DSVT/dsvt/utils.py +++ b/projects/DSVT/dsvt/utils.py @@ -3,10 +3,11 @@ import numpy as np import torch import torch.nn as nn +from mmdet.models.losses.utils import weighted_loss from torch import Tensor from mmdet3d.models.task_modules import CenterPointBBoxCoder -from mmdet3d.registry import TASK_UTILS +from mmdet3d.registry import MODELS, TASK_UTILS from .ops.ingroup_inds.ingroup_inds_op import ingroup_inds get_inner_win_inds_cuda = ingroup_inds @@ -298,3 +299,141 @@ def decode(self, 'support post_center_range is not None for now!') return predictions_dicts + + +def center_to_corner2d(center, dim): + corners_norm = torch.tensor( + [[-0.5, -0.5], [-0.5, 0.5], [0.5, 0.5], [0.5, -0.5]], + device=dim.device).type_as(center) # (4, 2) + corners = dim.view([-1, 1, 2]) * corners_norm.view([1, 4, 2]) # (N, 4, 2) + corners = corners + center.view(-1, 1, 2) + return corners + + +@weighted_loss +def diou3d_loss(pred_boxes, gt_boxes, eps: float = 1e-7): + """ + https://github.com/agent-sgs/PillarNet/blob/master/det3d/core/utils/center_utils.py # noqa + Args: + pred_boxes (N, 7): + gt_boxes (N, 7): + + Returns: + _type_: _description_ + """ + assert pred_boxes.shape[0] == gt_boxes.shape[0] + + qcorners = center_to_corner2d(pred_boxes[:, :2], + pred_boxes[:, 3:5]) # (N, 4, 2) + gcorners = center_to_corner2d(gt_boxes[:, :2], gt_boxes[:, + 3:5]) # (N, 4, 2) + + inter_max_xy = torch.minimum(qcorners[:, 2], gcorners[:, 2]) + inter_min_xy = torch.maximum(qcorners[:, 0], gcorners[:, 0]) + out_max_xy = torch.maximum(qcorners[:, 2], gcorners[:, 2]) + out_min_xy = torch.minimum(qcorners[:, 0], gcorners[:, 0]) + + # calculate area + volume_pred_boxes = pred_boxes[:, 3] * pred_boxes[:, 4] * pred_boxes[:, 5] + volume_gt_boxes = gt_boxes[:, 3] * gt_boxes[:, 4] * gt_boxes[:, 5] + + inter_h = torch.minimum( + pred_boxes[:, 2] + 0.5 * pred_boxes[:, 5], + gt_boxes[:, 2] + 0.5 * gt_boxes[:, 5]) - torch.maximum( + pred_boxes[:, 2] - 0.5 * pred_boxes[:, 5], + gt_boxes[:, 2] - 0.5 * gt_boxes[:, 5]) + inter_h = torch.clamp(inter_h, min=0) + + inter = torch.clamp((inter_max_xy - inter_min_xy), min=0) + volume_inter = inter[:, 0] * inter[:, 1] * inter_h + volume_union = volume_gt_boxes + volume_pred_boxes - volume_inter + eps + + # boxes_iou3d_gpu(pred_boxes, gt_boxes) + inter_diag = torch.pow(gt_boxes[:, 0:3] - pred_boxes[:, 0:3], 2).sum(-1) + + outer_h = torch.maximum( + gt_boxes[:, 2] + 0.5 * gt_boxes[:, 5], + pred_boxes[:, 2] + 0.5 * pred_boxes[:, 5]) - torch.minimum( + gt_boxes[:, 2] - 0.5 * gt_boxes[:, 5], + pred_boxes[:, 2] - 0.5 * pred_boxes[:, 5]) + outer_h = torch.clamp(outer_h, min=0) + outer = torch.clamp((out_max_xy - out_min_xy), min=0) + outer_diag = outer[:, 0]**2 + outer[:, 1]**2 + outer_h**2 + eps + + dious = volume_inter / volume_union - inter_diag / outer_diag + dious = torch.clamp(dious, min=-1.0, max=1.0) + + loss = 1 - dious + + return loss + + +@MODELS.register_module() +class DIoU3DLoss(nn.Module): + r"""3D bboxes Implementation of `Distance-IoU Loss: Faster and Better + Learning for Bounding Box Regression https://arxiv.org/abs/1911.08287`_. + + Code is modified from https://github.com/Zzh-tju/DIoU. + + Args: + eps (float): Epsilon to avoid log(0). + reduction (str): Options are "none", "mean" and "sum". + loss_weight (float): Weight of loss. + """ + + def __init__(self, + eps: float = 1e-6, + reduction: str = 'mean', + loss_weight: float = 1.0) -> None: + super().__init__() + self.eps = eps + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred: Tensor, + target: Tensor, + weight: Optional[Tensor] = None, + avg_factor: Optional[int] = None, + reduction_override: Optional[str] = None, + **kwargs) -> Tensor: + """Forward function. + + Args: + pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), + shape (n, 4). + target (Tensor): The learning target of the prediction, + shape (n, 4). + weight (Optional[Tensor], optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (Optional[int], optional): Average factor that is used + to average the loss. Defaults to None. + reduction_override (Optional[str], optional): The reduction method + used to override the original reduction method of the loss. + Defaults to None. Options are "none", "mean" and "sum". + + Returns: + Tensor: Loss tensor. + """ + if weight is not None and not torch.any(weight > 0): + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if weight is not None and weight.dim() > 1: + # TODO: remove this in the future + # reduce the weight of shape (n, 4) to (n,) to match the + # giou_loss of shape (n,) + assert weight.shape == pred.shape + weight = weight.mean(-1) + loss = self.loss_weight * diou3d_loss( + pred, + target, + weight, + eps=self.eps, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss From 7fecc0f2e5e98507a4c6486cff652ea2fe01541b Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 9 Aug 2023 13:33:27 +0800 Subject: [PATCH 2/7] fix batch_size --- convert_ckpt.py | 34 +++++++++++++++++++ mmdet3d/engine/hooks/visualization_hook.py | 4 +-- mmdet3d/structures/points/base_points.py | 6 ++-- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 20 ++--------- 4 files changed, 42 insertions(+), 22 deletions(-) create mode 100644 convert_ckpt.py diff --git a/convert_ckpt.py b/convert_ckpt.py new file mode 100644 index 0000000000..f5c3c9317e --- /dev/null +++ b/convert_ckpt.py @@ -0,0 +1,34 @@ +# from mmengine.config import Config +# cfg = Config.fromfile(path) +# print(cfg) +import torch + +mm3d_model = torch.load('checkpoints/dsvt_convert.pth') +dsvt_model = dict() +dsvt_model['model_state'] = dict() +for k, v in mm3d_model.items(): + if 'voxel_encoder' in k: + k = k.replace('voxel_encoder', 'vfe') + if 'middle_encoder' in k: + k = k.replace('middle_encoder', 'backbone_3d') + if 'backbone.' in k: + k = k.replace('backbone', 'backbone_2d') + if 'neck' in k: + k = k.replace('neck', 'backbone_2d') + if 'bbox_head.shared_conv' in k: + k = k.replace('bbox_head.shared_conv.conv', 'dense_head.shared_conv.0') + k = k.replace('bbox_head.shared_conv.bn', 'dense_head.shared_conv.1') + if 'bbox_head.task_heads' in k: + k = k.replace('bbox_head.task_heads', 'dense_head.heads_list') + if 'reg' in k: + k = k.replace('reg', 'center') + if 'height' in k: + k = k.replace('height', 'center_z') + if 'heatmap' in k: + k = k.replace('heatmap', 'hm') + if '0.conv' in k: + k = k.replace('0.conv', '0.0') + if '0.bn' in k: + k = k.replace('0.bn', '0.1') + dsvt_model['model_state'][k] = v +torch.save(dsvt_model, 'dsvt_ckpt.pth') diff --git a/mmdet3d/engine/hooks/visualization_hook.py b/mmdet3d/engine/hooks/visualization_hook.py index ffec1addc3..9de46d9692 100644 --- a/mmdet3d/engine/hooks/visualization_hook.py +++ b/mmdet3d/engine/hooks/visualization_hook.py @@ -78,11 +78,11 @@ def __init__(self, 'needs to be excluded.') self.vis_task = vis_task - if wait_time == -1: + if show and wait_time == -1: print_log( 'Manual control mode, press [Right] to next sample.', logger='current') - else: + elif show: print_log( 'Autoplay mode, press [SPACE] to pause.', logger='current') self.wait_time = wait_time diff --git a/mmdet3d/structures/points/base_points.py b/mmdet3d/structures/points/base_points.py index 4cb54ce895..188d20d270 100644 --- a/mmdet3d/structures/points/base_points.py +++ b/mmdet3d/structures/points/base_points.py @@ -247,10 +247,10 @@ def in_range_3d( """ in_range_flags = ((self.tensor[:, 0] > point_range[0]) & (self.tensor[:, 1] > point_range[1]) - & (self.tensor[:, 2] > point_range[2]) + # & (self.tensor[:, 2] > point_range[2]) & (self.tensor[:, 0] < point_range[3]) - & (self.tensor[:, 1] < point_range[4]) - & (self.tensor[:, 2] < point_range[5])) + & (self.tensor[:, 1] < point_range[4])) + # & (self.tensor[:, 2] < point_range[5])) return in_range_flags @property diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index f9288f09e3..0ac5c66c48 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -180,21 +180,7 @@ norm_intensity=True, norm_elongation=True, backend_args=backend_args), - dict( - type='MultiScaleFlipAug3D', - img_scale=(1333, 800), - pts_scale_ratio=1, - flip=False, - transforms=[ - dict( - type='GlobalRotScaleTrans', - rot_range=[0, 0], - scale_ratio_range=[1., 1.], - translation_std=[0, 0, 0]), - dict(type='RandomFlip3D'), - dict( - type='PointsRangeFilter', point_cloud_range=point_cloud_range) - ]), + dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='Pack3DDetInputs', keys=['points']) ] @@ -220,8 +206,8 @@ load_interval=5, backend_args=backend_args)) val_dataloader = dict( - batch_size=4, - num_workers=4, + batch_size=1, + num_workers=1, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), From 83426854d89edcc08db42735a1068015145429df Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Tue, 12 Sep 2023 11:39:27 +0800 Subject: [PATCH 3/7] fix train iter --- convert_ckpt.py | 34 ----- mmdet3d/datasets/det3d_dataset.py | 4 +- mmdet3d/datasets/waymo_dataset.py | 58 ++++++++- .../models/dense_heads/centerpoint_head.py | 2 +- mmdet3d/structures/bbox_3d/base_box3d.py | 13 +- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 30 ++--- projects/DSVT/dsvt/__init__.py | 4 +- projects/DSVT/dsvt/dsvt_head.py | 14 ++- projects/DSVT/dsvt/dynamic_pillar_vfe.py | 3 + projects/DSVT/dsvt/transforms_3d.py | 116 ++++++++++++++++++ tools/train.py | 10 ++ 11 files changed, 219 insertions(+), 69 deletions(-) delete mode 100644 convert_ckpt.py create mode 100644 projects/DSVT/dsvt/transforms_3d.py diff --git a/convert_ckpt.py b/convert_ckpt.py deleted file mode 100644 index f5c3c9317e..0000000000 --- a/convert_ckpt.py +++ /dev/null @@ -1,34 +0,0 @@ -# from mmengine.config import Config -# cfg = Config.fromfile(path) -# print(cfg) -import torch - -mm3d_model = torch.load('checkpoints/dsvt_convert.pth') -dsvt_model = dict() -dsvt_model['model_state'] = dict() -for k, v in mm3d_model.items(): - if 'voxel_encoder' in k: - k = k.replace('voxel_encoder', 'vfe') - if 'middle_encoder' in k: - k = k.replace('middle_encoder', 'backbone_3d') - if 'backbone.' in k: - k = k.replace('backbone', 'backbone_2d') - if 'neck' in k: - k = k.replace('neck', 'backbone_2d') - if 'bbox_head.shared_conv' in k: - k = k.replace('bbox_head.shared_conv.conv', 'dense_head.shared_conv.0') - k = k.replace('bbox_head.shared_conv.bn', 'dense_head.shared_conv.1') - if 'bbox_head.task_heads' in k: - k = k.replace('bbox_head.task_heads', 'dense_head.heads_list') - if 'reg' in k: - k = k.replace('reg', 'center') - if 'height' in k: - k = k.replace('height', 'center_z') - if 'heatmap' in k: - k = k.replace('heatmap', 'hm') - if '0.conv' in k: - k = k.replace('0.conv', '0.0') - if '0.bn' in k: - k = k.replace('0.bn', '0.1') - dsvt_model['model_state'][k] = v -torch.save(dsvt_model, 'dsvt_ckpt.pth') diff --git a/mmdet3d/datasets/det3d_dataset.py b/mmdet3d/datasets/det3d_dataset.py index 8e1570e794..c701a893fd 100644 --- a/mmdet3d/datasets/det3d_dataset.py +++ b/mmdet3d/datasets/det3d_dataset.py @@ -143,7 +143,9 @@ def __init__(self, # show statistics of this dataset print_log('-' * 30, 'current') - print_log(f'The length of the dataset: {len(self)}', 'current') + print_log( + f'The length of {"test" if self.test_mode else "training"} dataset: {len(self)}', # noqa: E501 + 'current') content_show = [['category', 'number']] for label, num in enumerate(self.num_ins_per_cat): cat_name = self.metainfo['classes'][label] diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index f5a35f5af0..58093a355d 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -2,7 +2,10 @@ import os.path as osp from typing import Callable, List, Union +import mmengine import numpy as np +from mmengine import print_log +from mmengine.fileio import load from mmdet3d.registry import DATASETS from mmdet3d.structures import LiDARInstance3DBoxes @@ -176,9 +179,58 @@ def parse_ann_info(self, info: dict) -> dict: return anns_results def load_data_list(self) -> List[dict]: - """Add the load interval.""" - data_list = super().load_data_list() - data_list = data_list[::self.load_interval] + """Add the load interval. + + Returns: + list[dict]: A list of annotation. + """ # noqa: E501 + # `self.ann_file` denotes the absolute annotation file path if + # `self.root=None` or relative path if `self.root=/path/to/data/`. + annotations = load(self.ann_file) + if not isinstance(annotations, dict): + raise TypeError(f'The annotations loaded from annotation file ' + f'should be a dict, but got {type(annotations)}!') + if 'data_list' not in annotations or 'metainfo' not in annotations: + raise ValueError('Annotation must have data_list and metainfo ' + 'keys') + metainfo = annotations['metainfo'] + raw_data_list = annotations['data_list'] + raw_data_list = raw_data_list[::self.load_interval] + if self.load_interval > 1: + print_log( + f'Sample size will be reduced to 1/{self.load_interval} of' + 'the original data sample', + logger='current') + + # Meta information load from annotation file will not influence the + # existed meta information load from `BaseDataset.METAINFO` and + # `metainfo` arguments defined in constructor. + for k, v in metainfo.items(): + self._metainfo.setdefault(k, v) + + # load and parse data_infos. + data_list = [] + for raw_data_info in mmengine.track_iter_progress(raw_data_list): + # parse raw data information to target format + data_info = self.parse_data_info(raw_data_info) + if isinstance(data_info, dict): + # For image tasks, `data_info` should information if single + # image, such as dict(img_path='xxx', width=360, ...) + data_list.append(data_info) + elif isinstance(data_info, list): + # For video tasks, `data_info` could contain image + # information of multiple frames, such as + # [dict(video_path='xxx', timestamps=...), + # dict(video_path='xxx', timestamps=...)] + for item in data_info: + if not isinstance(item, dict): + raise TypeError('data_info must be list of dict, but ' + f'got {type(item)}') + data_list.extend(data_info) + else: + raise TypeError('data_info should be a dict or list of dict, ' + f'but got {type(data_info)}') + return data_list def parse_data_info(self, info: dict) -> Union[dict, List[dict]]: diff --git a/mmdet3d/models/dense_heads/centerpoint_head.py b/mmdet3d/models/dense_heads/centerpoint_head.py index 12ba84234e..301db0eee0 100644 --- a/mmdet3d/models/dense_heads/centerpoint_head.py +++ b/mmdet3d/models/dense_heads/centerpoint_head.py @@ -101,7 +101,7 @@ def forward(self, x): Returns: dict[str: torch.Tensor]: contains the following keys: - -reg (torch.Tensor): 2D regression value with the + -reg (torch.Tensor): 2D regression value with the shape of [B, 2, H, W]. -height (torch.Tensor): Height value with the shape of [B, 1, H, W]. diff --git a/mmdet3d/structures/bbox_3d/base_box3d.py b/mmdet3d/structures/bbox_3d/base_box3d.py index 50b092c06e..7fb703c731 100644 --- a/mmdet3d/structures/bbox_3d/base_box3d.py +++ b/mmdet3d/structures/bbox_3d/base_box3d.py @@ -275,12 +275,13 @@ def in_range_3d( Tensor: A binary vector indicating whether each point is inside the reference range. """ - in_range_flags = ((self.tensor[:, 0] > box_range[0]) - & (self.tensor[:, 1] > box_range[1]) - & (self.tensor[:, 2] > box_range[2]) - & (self.tensor[:, 0] < box_range[3]) - & (self.tensor[:, 1] < box_range[4]) - & (self.tensor[:, 2] < box_range[5])) + gravity_center = self.gravity_center + in_range_flags = ((gravity_center[:, 0] > box_range[0]) + & (gravity_center[:, 1] > box_range[1]) + & (gravity_center[:, 2] > box_range[2]) + & (gravity_center[:, 0] < box_range[3]) + & (gravity_center[:, 1] < box_range[4]) + & (gravity_center[:, 2] < box_range[5])) return in_range_flags @abstractmethod diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index abcb15bf26..8429a2dc6c 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -2,6 +2,7 @@ custom_imports = dict( imports=['projects.DSVT.dsvt'], allow_failed_imports=False) +# load_from = 'checkpoints/dsvt_init_mm3d.pth' voxel_size = [0.32, 0.32, 6] grid_size = [468, 468, 1] point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4.0] @@ -89,7 +90,7 @@ loss_cls=dict( type='mmdet.GaussianFocalLoss', reduction='mean', loss_weight=1.0), loss_bbox=dict(type='mmdet.L1Loss', reduction='mean', loss_weight=2.0), - loss_iou=dict(type='mmdet.L1Loss', reduction='none', loss_weight=1.0), + loss_iou=dict(type='mmdet.L1Loss', reduction='sum', loss_weight=1.0), loss_reg_iou=dict( type='mmdet3d.DIoU3DLoss', reduction='mean', loss_weight=2.0), norm_bbox=True), @@ -163,9 +164,9 @@ rot_range=[-0.78539816, 0.78539816], scale_ratio_range=[0.95, 1.05], translation_std=[0.5, 0.5, 0.5]), - dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), - dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), - dict(type='ObjectNameFilter', classes=class_names), + dict(type='DSVTPointsRangeFilter', point_cloud_range=point_cloud_range), + dict(type='DSVTObjectRangeFilter', point_cloud_range=point_cloud_range), + # dict(type='ObjectNameFilter', classes=class_names), dict(type='PointShuffle'), dict( type='Pack3DDetInputs', @@ -181,8 +182,11 @@ norm_intensity=True, norm_elongation=True, backend_args=backend_args), - dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), - dict(type='Pack3DDetInputs', keys=['points']) + dict(type='DSVTPointsRangeFilter', point_cloud_range=point_cloud_range), + dict( + type='Pack3DDetInputs', + keys=['points'], + meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp']) ] dataset_type = 'WaymoDataset' @@ -237,17 +241,9 @@ waymo_bin_file='./data/waymo/waymo_format/gt.bin', backend_args=backend_args, convert_kitti_format=False) -# val_evaluator = dict( -# type='WaymoMetric', -# ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', -# waymo_bin_file='./data/waymo/waymo_format/gt.bin', -# data_root='./data/waymo/waymo_format', -# backend_args=backend_args, -# convert_kitti_format=False, -# idx2metainfo='./data/waymo/waymo_format/idx2metainfo.pkl') test_evaluator = val_evaluator -vis_backends = [dict(type='LocalVisBackend')] +vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') lr = 1e-5 @@ -295,7 +291,7 @@ ] # runtime settings -train_cfg = dict(by_epoch=True, max_epochs=12, val_interval=12) +train_cfg = dict(by_epoch=True, max_epochs=12, val_interval=1) # runtime settings val_cfg = dict() @@ -309,5 +305,5 @@ default_hooks = dict( logger=dict(type='LoggerHook', interval=50), - checkpoint=dict(type='CheckpointHook', interval=5)) + checkpoint=dict(type='CheckpointHook', interval=1)) custom_hooks = [dict(type='DisableObjectSampleHook', disable_after_epoch=11)] diff --git a/projects/DSVT/dsvt/__init__.py b/projects/DSVT/dsvt/__init__.py index 4fff505fb3..ed602d6a8a 100644 --- a/projects/DSVT/dsvt/__init__.py +++ b/projects/DSVT/dsvt/__init__.py @@ -4,9 +4,11 @@ from .dynamic_pillar_vfe import DynamicPillarVFE3D from .map2bev import PointPillarsScatter3D from .res_second import ResSECOND +from .transforms_3d import DSVTObjectRangeFilter, DSVTPointsRangeFilter from .utils import DSVTBBoxCoder __all__ = [ 'DSVTCenterHead', 'DSVT', 'DSVTMiddleEncoder', 'DynamicPillarVFE3D', - 'PointPillarsScatter3D', 'ResSECOND', 'DSVTBBoxCoder' + 'PointPillarsScatter3D', 'ResSECOND', 'DSVTBBoxCoder', + 'DSVTObjectRangeFilter', 'DSVTPointsRangeFilter' ] diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index 2d7d8d03db..e6ff075305 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -161,6 +161,7 @@ def calc_iou_loss(self, iou_preds, batch_box_preds, mask, ind, gt_boxes): iou_target = iou_target * 2 - 1 # [0, 1] ==> [-1, 1] loss = self.loss_iou(selected_iou_preds.view(-1), iou_target) + loss = loss / torch.clamp(mask.sum(), min=1e-4) return loss def calc_iou_reg_loss(self, batch_box_preds, mask, ind, gt_boxes): @@ -222,10 +223,10 @@ def get_targets( # Transpose inds inds = list(map(list, zip(*inds))) inds = [torch.stack(inds_) for inds_ in inds] - # Transpose inds + # Transpose masks masks = list(map(list, zip(*masks))) masks = [torch.stack(masks_) for masks_ in masks] - # Transpose inds + # Transpose task_gt_bboxes task_gt_bboxes = list(map(list, zip(*task_gt_bboxes))) return heatmaps, anno_boxes, inds, masks, task_gt_bboxes @@ -358,8 +359,8 @@ def get_targets_single(self, anno_box[new_idx] = torch.cat([ center - torch.tensor([x, y], device=device), z.unsqueeze(0), box_dim, - torch.sin(rot).unsqueeze(0), - torch.cos(rot).unsqueeze(0) + torch.cos(rot).unsqueeze(0), + torch.sin(rot).unsqueeze(0) ]) heatmaps.append(heatmap) @@ -432,7 +433,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], loss_dict[f'task{task_id}.loss_iou'] = self.calc_iou_loss( iou_preds=preds_dict[0]['iou'], batch_box_preds=batch_box_preds_for_iou.clone().detach(), - mask=mask.all(dim=-1), + mask=masks[task_id], ind=ind, gt_boxes=task_gt_bboxes[task_id]) @@ -440,7 +441,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], loss_dict[f'task{task_id}.loss_reg_iou'] = \ self.calc_iou_reg_loss( batch_box_preds=batch_box_preds_for_iou, - mask=mask.all(dim=-1), + mask=masks[task_id], ind=ind, gt_boxes=task_gt_bboxes[task_id]) @@ -517,6 +518,7 @@ def predict_by_feat(self, preds_dicts: Tuple[List[dict]], else: batch_dim = preds_dict[0]['dim'] + # It's different from CenterHead batch_rotc = preds_dict[0]['rot'][:, 0].unsqueeze(1) batch_rots = preds_dict[0]['rot'][:, 1].unsqueeze(1) batch_iou = (preds_dict[0]['iou'] + diff --git a/projects/DSVT/dsvt/dynamic_pillar_vfe.py b/projects/DSVT/dsvt/dynamic_pillar_vfe.py index 97c75aaf00..6d21a69713 100644 --- a/projects/DSVT/dsvt/dynamic_pillar_vfe.py +++ b/projects/DSVT/dsvt/dynamic_pillar_vfe.py @@ -1,4 +1,5 @@ # modified from https://github.com/Haiyang-W/DSVT +import numpy as np import torch import torch.nn as nn import torch_scatter @@ -76,6 +77,8 @@ def __init__(self, with_distance, use_absolute_xyz, use_norm, num_filters, self.voxel_x = voxel_size[0] self.voxel_y = voxel_size[1] self.voxel_z = voxel_size[2] + # TODO: remove it after 对齐精度 + point_cloud_range = np.array(point_cloud_range).astype(np.float32) self.x_offset = self.voxel_x / 2 + point_cloud_range[0] self.y_offset = self.voxel_y / 2 + point_cloud_range[1] self.z_offset = self.voxel_z / 2 + point_cloud_range[2] diff --git a/projects/DSVT/dsvt/transforms_3d.py b/projects/DSVT/dsvt/transforms_3d.py new file mode 100644 index 0000000000..03d2c38dd7 --- /dev/null +++ b/projects/DSVT/dsvt/transforms_3d.py @@ -0,0 +1,116 @@ +from typing import List + +import numpy as np +from mmcv import BaseTransform + +from mmdet3d.registry import TRANSFORMS + + +@TRANSFORMS.register_module() +class DSVTObjectRangeFilter(BaseTransform): + """Filter objects by the range. It differs from `ObjectRangeFilter` by + using `in_range_3d` instead of `in_range_bev`. + + Required Keys: + + - gt_bboxes_3d + + Modified Keys: + + - gt_bboxes_3d + + Args: + point_cloud_range (list[float]): Point cloud range. + """ + + def __init__(self, point_cloud_range: List[float]) -> None: + self.pcd_range = np.array(point_cloud_range, dtype=np.float32) + + def transform(self, input_dict: dict) -> dict: + """Transform function to filter objects by the range. + + Args: + input_dict (dict): Result dict from loading pipeline. + + Returns: + dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' + keys are updated in the result dict. + """ + gt_bboxes_3d = input_dict['gt_bboxes_3d'] + gt_labels_3d = input_dict['gt_labels_3d'] + mask = gt_bboxes_3d.in_range_3d(self.pcd_range) + gt_bboxes_3d = gt_bboxes_3d[mask] + # mask is a torch tensor but gt_labels_3d is still numpy array + # using mask to index gt_labels_3d will cause bug when + # len(gt_labels_3d) == 1, where mask=1 will be interpreted + # as gt_labels_3d[1] and cause out of index error + gt_labels_3d = gt_labels_3d[mask.numpy().astype(bool)] + + # limit rad to [-pi, pi] + gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi) + input_dict['gt_bboxes_3d'] = gt_bboxes_3d + input_dict['gt_labels_3d'] = gt_labels_3d + + return input_dict + + def __repr__(self) -> str: + """str: Return a string that describes the module.""" + repr_str = self.__class__.__name__ + repr_str += f'(point_cloud_range={self.pcd_range.tolist()})' + return repr_str + + +@TRANSFORMS.register_module() +class DSVTPointsRangeFilter(BaseTransform): + """Filter points by the range. It differs from `PointRangeFilter` by using + `in_range_bev` instead of `in_range_3d`. + + Required Keys: + + - points + - pts_instance_mask (optional) + + Modified Keys: + + - points + - pts_instance_mask (optional) + + Args: + point_cloud_range (list[float]): Point cloud range. + """ + + def __init__(self, point_cloud_range: List[float]) -> None: + self.pcd_range = np.array(point_cloud_range, dtype=np.float32) + + def transform(self, input_dict: dict) -> dict: + """Transform function to filter points by the range. + + Args: + input_dict (dict): Result dict from loading pipeline. + + Returns: + dict: Results after filtering, 'points', 'pts_instance_mask' + and 'pts_semantic_mask' keys are updated in the result dict. + """ + points = input_dict['points'] + points_mask = points.in_range_bev(self.pcd_range[[0, 1, 3, 4]]) + clean_points = points[points_mask] + input_dict['points'] = clean_points + points_mask = points_mask.numpy() + + pts_instance_mask = input_dict.get('pts_instance_mask', None) + pts_semantic_mask = input_dict.get('pts_semantic_mask', None) + + if pts_instance_mask is not None: + input_dict['pts_instance_mask'] = pts_instance_mask[points_mask] + + if pts_semantic_mask is not None: + input_dict['pts_semantic_mask'] = pts_semantic_mask[points_mask] + + return input_dict + + def __repr__(self) -> str: + """str: Return a string that describes the module.""" + repr_str = self.__class__.__name__ + repr_str += f'(point_cloud_range={self.pcd_range.tolist()})' + return repr_str diff --git a/tools/train.py b/tools/train.py index b2ced54b05..6b9c3b0842 100644 --- a/tools/train.py +++ b/tools/train.py @@ -21,6 +21,12 @@ def parse_args(): action='store_true', default=False, help='enable automatic-mixed-precision training') + parser.add_argument( + '--sync_bn', + choices=['none', 'torch', 'mmcv'], + default='none', + help='convert all BatchNorm layers in the model to SyncBatchNorm ' + '(SyncBN) or mmcv.ops.sync_bn.SyncBatchNorm (MMSyncBN) layers.') parser.add_argument( '--auto-scale-lr', action='store_true', @@ -98,6 +104,10 @@ def main(): cfg.optim_wrapper.type = 'AmpOptimWrapper' cfg.optim_wrapper.loss_scale = 'dynamic' + # convert BatchNorm layers + if args.sync_bn != 'none': + cfg.sync_bn = args.sync_bn + # enable automatically scaling LR if args.auto_scale_lr: if 'auto_scale_lr' in cfg and \ From 2c9b33392718d3bca4c7c3ead4b54c4509d3b191 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Fri, 15 Sep 2023 14:46:21 +0800 Subject: [PATCH 4/7] fix dis aug and model init --- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 28 ++++---- projects/DSVT/dsvt/__init__.py | 3 +- projects/DSVT/dsvt/disable_aug_hook.py | 69 +++++++++++++++++++ projects/DSVT/dsvt/dsvt_head.py | 5 ++ 4 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 projects/DSVT/dsvt/disable_aug_hook.py diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index 8429a2dc6c..8220057a83 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -2,7 +2,7 @@ custom_imports = dict( imports=['projects.DSVT.dsvt'], allow_failed_imports=False) -# load_from = 'checkpoints/dsvt_init_mm3d.pth' +# load_from = 'checkpoints/dsvt_init.pth' voxel_size = [0.32, 0.32, 6] grid_size = [468, 468, 1] point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4.0] @@ -143,15 +143,8 @@ load_dim=6, use_dim=5, norm_intensity=True, + norm_elongation=True, backend_args=backend_args), - # Add this if using `MultiFrameDeformableDecoderRPN` - # dict( - # type='LoadPointsFromMultiSweeps', - # sweeps_num=9, - # load_dim=6, - # use_dim=[0, 1, 2, 3, 4], - # pad_empty_sweeps=True, - # remove_close=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='ObjectSample', db_sampler=db_sampler), dict( @@ -194,6 +187,7 @@ batch_size=1, num_workers=4, persistent_workers=True, + # sampler=dict(type='DefaultSampler', shuffle=False), sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, @@ -211,8 +205,8 @@ load_interval=5, backend_args=backend_args)) val_dataloader = dict( - batch_size=1, - num_workers=1, + batch_size=4, + num_workers=4, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), @@ -244,6 +238,7 @@ test_evaluator = val_evaluator vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')] +# vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') lr = 1e-5 @@ -251,7 +246,7 @@ # max_norm=10 is better for SECOND optim_wrapper = dict( type='OptimWrapper', - optimizer=dict(type='Adam', lr=lr, weight_decay=0.05, betas=(0.9, 0.999)), + optimizer=dict(type='AdamW', lr=lr, weight_decay=0.05, betas=(0.9, 0.99)), clip_grad=dict(max_norm=10, norm_type=2)) # learning rate param_scheduler = [ @@ -306,4 +301,11 @@ default_hooks = dict( logger=dict(type='LoggerHook', interval=50), checkpoint=dict(type='CheckpointHook', interval=1)) -custom_hooks = [dict(type='DisableObjectSampleHook', disable_after_epoch=11)] +custom_hooks = [ + dict( + type='DisableAugHook', + disable_after_epoch=11, + disable_aug_list=[ + 'GlobalRotScaleTrans', 'RandomFlip3D', 'ObjectSample' + ]) +] diff --git a/projects/DSVT/dsvt/__init__.py b/projects/DSVT/dsvt/__init__.py index ed602d6a8a..58395863a4 100644 --- a/projects/DSVT/dsvt/__init__.py +++ b/projects/DSVT/dsvt/__init__.py @@ -1,3 +1,4 @@ +from .disable_aug_hook import DisableAugHook from .dsvt import DSVT from .dsvt_head import DSVTCenterHead from .dsvt_transformer import DSVTMiddleEncoder @@ -10,5 +11,5 @@ __all__ = [ 'DSVTCenterHead', 'DSVT', 'DSVTMiddleEncoder', 'DynamicPillarVFE3D', 'PointPillarsScatter3D', 'ResSECOND', 'DSVTBBoxCoder', - 'DSVTObjectRangeFilter', 'DSVTPointsRangeFilter' + 'DSVTObjectRangeFilter', 'DSVTPointsRangeFilter', 'DisableAugHook' ] diff --git a/projects/DSVT/dsvt/disable_aug_hook.py b/projects/DSVT/dsvt/disable_aug_hook.py new file mode 100644 index 0000000000..5a4dff5fb2 --- /dev/null +++ b/projects/DSVT/dsvt/disable_aug_hook.py @@ -0,0 +1,69 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List + +from mmengine.dataset import BaseDataset +from mmengine.hooks import Hook +from mmengine.model import is_model_wrapper +from mmengine.runner import Runner + +from mmdet3d.registry import HOOKS + + +@HOOKS.register_module() +class DisableAugHook(Hook): + """The hook of disabling augmentations during training. + + Args: + disable_after_epoch (int): The number of epochs after which + the data augmentation will be closed in the training. + Defaults to 15. + disable_aug_list (list): the list of data augmentation will + be closed in the training. Defaults to []. + """ + + def __init__(self, + disable_after_epoch: int = 15, + disable_aug_list: List = []): + self.disable_after_epoch = disable_after_epoch + self.disable_aug_list = disable_aug_list + self._restart_dataloader = False + + def before_train_epoch(self, runner: Runner): + """Close augmentation. + + Args: + runner (Runner): The runner. + """ + epoch = runner.epoch + train_loader = runner.train_dataloader + model = runner.model + # TODO: refactor after mmengine using model wrapper + if is_model_wrapper(model): + model = model.module + if epoch == self.disable_after_epoch: + + dataset = runner.train_dataloader.dataset + # handle dataset wrapper + if not isinstance(dataset, BaseDataset): + dataset = dataset.dataset + new_transforms = [] + for transform in dataset.pipeline.transforms: # noqa: E501 + if transform.__class__.__name__ not in self.disable_aug_list: + new_transforms.append(transform) + else: + runner.logger.info( + f'Disable {transform.__class__.__name__}') + dataset.pipeline.transforms = new_transforms + # The dataset pipeline cannot be updated when persistent_workers + # is True, so we need to force the dataloader's multi-process + # restart. This is a very hacky approach. + if hasattr(train_loader, 'persistent_workers' + ) and train_loader.persistent_workers is True: + train_loader._DataLoader__initialized = False + train_loader._iterator = None + self._restart_dataloader = True + else: + # Once the restart is complete, we need to restore + # the initialization flag. + if self._restart_dataloader: + train_loader._DataLoader__initialized = True diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index e6ff075305..74bae83f2e 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -32,6 +32,11 @@ def __init__(self, self.loss_iou_reg = MODELS.build( loss_reg_iou) if loss_reg_iou is not None else None + def init_weights(self): + super().init_weights() + for task_head in self.task_heads: + task_head.init_weights() + def forward_single(self, x: Tensor) -> dict: """Forward function for CenterPoint. From 98247780baa74761c1b47dabe4cf79c57fe384c4 Mon Sep 17 00:00:00 2001 From: sunjihao1999 <578431509@qq.com> Date: Mon, 18 Sep 2023 13:28:08 +0800 Subject: [PATCH 5/7] train align --- mmdet3d/datasets/waymo_dataset.py | 2 +- mmdet3d/evaluation/metrics/waymo_metric.py | 4 ++-- .../models/dense_heads/centerpoint_head.py | 2 +- mmdet3d/models/necks/second_fpn.py | 18 +++++++++------ ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 6 +++-- projects/DSVT/dsvt/dsvt_head.py | 23 ++++++++++++++++--- projects/DSVT/dsvt/res_second.py | 13 +---------- projects/DSVT/dsvt/utils.py | 4 ++-- 8 files changed, 42 insertions(+), 30 deletions(-) diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index 58093a355d..f12462eaa8 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -210,7 +210,7 @@ def load_data_list(self) -> List[dict]: # load and parse data_infos. data_list = [] - for raw_data_info in mmengine.track_iter_progress(raw_data_list): + for raw_data_info in raw_data_list: # parse raw data information to target format data_info = self.parse_data_info(raw_data_info) if isinstance(data_info, dict): diff --git a/mmdet3d/evaluation/metrics/waymo_metric.py b/mmdet3d/evaluation/metrics/waymo_metric.py index fb8ac495e6..dc2f209b9a 100644 --- a/mmdet3d/evaluation/metrics/waymo_metric.py +++ b/mmdet3d/evaluation/metrics/waymo_metric.py @@ -36,7 +36,7 @@ class WaymoMetric(KittiMetric): prefix (str, optional): The prefix that will be added in the metric names to disambiguate homonymous metrics of different evaluators. If prefix is not provided in the argument, self.default_prefix will - be used instead. Defaults to None. + be used instead. Defaults to Waymo metric. format_only (bool): Format the output results without perform evaluation. It is useful when you want to format the result to a specific format and submit it to the test server. @@ -83,7 +83,7 @@ def __init__(self, metric: Union[str, List[str]] = 'mAP', pcd_limit_range: List[float] = [-85, -85, -5, 85, 85, 5], convert_kitti_format: bool = True, - prefix: Optional[str] = None, + prefix: Optional[str] = 'Waymo metric', format_only: bool = False, pklfile_prefix: Optional[str] = None, submission_prefix: Optional[str] = None, diff --git a/mmdet3d/models/dense_heads/centerpoint_head.py b/mmdet3d/models/dense_heads/centerpoint_head.py index 301db0eee0..c3fc187964 100644 --- a/mmdet3d/models/dense_heads/centerpoint_head.py +++ b/mmdet3d/models/dense_heads/centerpoint_head.py @@ -217,7 +217,7 @@ def forward(self, x): Returns: dict[str: torch.Tensor]: contains the following keys: - -reg (torch.Tensor): 2D regression value with the + -reg (torch.Tensor): 2D regression value with the shape of [B, 2, H, W]. -height (torch.Tensor): Height value with the shape of [B, 1, H, W]. diff --git a/mmdet3d/models/necks/second_fpn.py b/mmdet3d/models/necks/second_fpn.py index 90e57ec05c..e52a26ec4c 100644 --- a/mmdet3d/models/necks/second_fpn.py +++ b/mmdet3d/models/necks/second_fpn.py @@ -21,6 +21,10 @@ class SECONDFPN(BaseModule): upsample_cfg (dict): Config dict of upsample layers. conv_cfg (dict): Config dict of conv layers. use_conv_for_no_stride (bool): Whether to use conv when stride is 1. + init_cfg (dict or :obj:`ConfigDict` or list[dict or :obj:`ConfigDict`], + optional): Initialization config dict. Defaults to + [dict(type='Kaiming', layer='ConvTranspose2d'), + dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0)]. """ def __init__(self, @@ -31,7 +35,13 @@ def __init__(self, upsample_cfg=dict(type='deconv', bias=False), conv_cfg=dict(type='Conv2d', bias=False), use_conv_for_no_stride=False, - init_cfg=None): + init_cfg=[ + dict(type='Kaiming', layer='ConvTranspose2d'), + dict( + type='Constant', + layer='NaiveSyncBatchNorm2d', + val=1.0) + ]): # if for GroupNorm, # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True) super(SECONDFPN, self).__init__(init_cfg=init_cfg) @@ -64,12 +74,6 @@ def __init__(self, deblocks.append(deblock) self.deblocks = nn.ModuleList(deblocks) - if init_cfg is None: - self.init_cfg = [ - dict(type='Kaiming', layer='ConvTranspose2d'), - dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0) - ] - def forward(self, x): """Forward function. diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index 8220057a83..9d0be465e8 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -133,6 +133,8 @@ coord_type='LIDAR', load_dim=6, use_dim=[0, 1, 2, 3, 4], + norm_intensity=True, + norm_elongation=True, backend_args=backend_args), backend_args=backend_args) @@ -192,7 +194,7 @@ dataset=dict( type=dataset_type, data_root=data_root, - ann_file='waymo_infos_train.pkl', + ann_file='waymo_wo_cam_ins_infos_train.pkl', data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'), pipeline=train_pipeline, modality=input_modality, @@ -214,7 +216,7 @@ type=dataset_type, data_root=data_root, data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'), - ann_file='waymo_infos_val.pkl', + ann_file='waymo_wo_cam_ins_infos_val.pkl', pipeline=test_pipeline, modality=input_modality, test_mode=True, diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index 74bae83f2e..808d3c3310 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -12,6 +12,10 @@ gaussian_radius) from mmdet3d.registry import MODELS from mmdet3d.structures import Det3DDataSample, xywhr2xyxyr +from mmengine.model import kaiming_init +import math +from torch.nn.init import constant_ +import torch.nn as nn @MODELS.register_module() @@ -33,9 +37,22 @@ def __init__(self, loss_reg_iou) if loss_reg_iou is not None else None def init_weights(self): - super().init_weights() - for task_head in self.task_heads: - task_head.init_weights() + kaiming_init( + self.shared_conv.conv, + a=math.sqrt(5), + mode='fan_in', + nonlinearity='leaky_relu', + distribution='uniform') + for head in self.task_heads[0].heads: + if head == 'heatmap': + constant_(self.task_heads[0].__getattr__(head)[-1].bias, + self.task_heads[0].init_bias) + else: + for m in self.task_heads[0].__getattr__(head).modules(): + if isinstance(m, nn.Conv2d): + kaiming_init( + m, mode='fan_in', nonlinearity='leaky_relu') + def forward_single(self, x: Tensor) -> dict: """Forward function for CenterPoint. diff --git a/projects/DSVT/dsvt/res_second.py b/projects/DSVT/dsvt/res_second.py index e1ddc1be6c..0755f895f9 100644 --- a/projects/DSVT/dsvt/res_second.py +++ b/projects/DSVT/dsvt/res_second.py @@ -78,8 +78,6 @@ class ResSECOND(BaseModule): out_channels (list[int]): Output channels for multi-scale feature maps. blocks_nums (list[int]): Number of blocks in each stage. layer_strides (list[int]): Strides of each stage. - norm_cfg (dict): Config dict of normalization layers. - conv_cfg (dict): Config dict of convolutional layers. """ def __init__(self, @@ -87,8 +85,7 @@ def __init__(self, out_channels: Sequence[int] = [128, 128, 256], blocks_nums: Sequence[int] = [1, 2, 2], layer_strides: Sequence[int] = [2, 2, 2], - init_cfg: OptMultiConfig = None, - pretrained: Optional[str] = None) -> None: + init_cfg: OptMultiConfig = None) -> None: super(ResSECOND, self).__init__(init_cfg=init_cfg) assert len(layer_strides) == len(blocks_nums) assert len(out_channels) == len(blocks_nums) @@ -108,14 +105,6 @@ def __init__(self, BasicResBlock(out_channels[i], out_channels[i])) blocks.append(nn.Sequential(*cur_layers)) self.blocks = nn.Sequential(*blocks) - assert not (init_cfg and pretrained), \ - 'init_cfg and pretrained cannot be setting at the same time' - if isinstance(pretrained, str): - warnings.warn('DeprecationWarning: pretrained is a deprecated, ' - 'please use "init_cfg" instead') - self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) - else: - self.init_cfg = dict(type='Kaiming', layer='Conv2d') def forward(self, x: Tensor) -> Tuple[Tensor, ...]: """Forward function. diff --git a/projects/DSVT/dsvt/utils.py b/projects/DSVT/dsvt/utils.py index 4d697d2dbb..7845b27f3f 100644 --- a/projects/DSVT/dsvt/utils.py +++ b/projects/DSVT/dsvt/utils.py @@ -9,7 +9,7 @@ from mmdet3d.models.task_modules import CenterPointBBoxCoder from mmdet3d.registry import MODELS, TASK_UTILS from .ops.ingroup_inds.ingroup_inds_op import ingroup_inds - +from mmengine import print_log get_inner_win_inds_cuda = ingroup_inds @@ -267,7 +267,7 @@ def decode(self, thresh_mask = final_scores > self.score_threshold if self.post_center_range is not None: - self.post_center_range = torch.tensor( + self.post_center_range = torch.as_tensor( self.post_center_range, device=heat.device) mask = (final_box_preds[..., :3] >= self.post_center_range[:3]).all(2) From 681423f9e48186a674c4bee7ef640824ec83774d Mon Sep 17 00:00:00 2001 From: sunjihao1999 <578431509@qq.com> Date: Mon, 18 Sep 2023 13:32:29 +0800 Subject: [PATCH 6/7] fix lint --- mmdet3d/datasets/waymo_dataset.py | 1 - mmdet3d/models/necks/second_fpn.py | 2 +- projects/DSVT/dsvt/dsvt_head.py | 9 ++++----- projects/DSVT/dsvt/res_second.py | 4 +--- projects/DSVT/dsvt/utils.py | 2 +- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index f12462eaa8..0d131164e9 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -2,7 +2,6 @@ import os.path as osp from typing import Callable, List, Union -import mmengine import numpy as np from mmengine import print_log from mmengine.fileio import load diff --git a/mmdet3d/models/necks/second_fpn.py b/mmdet3d/models/necks/second_fpn.py index e52a26ec4c..d4dc590c15 100644 --- a/mmdet3d/models/necks/second_fpn.py +++ b/mmdet3d/models/necks/second_fpn.py @@ -22,7 +22,7 @@ class SECONDFPN(BaseModule): conv_cfg (dict): Config dict of conv layers. use_conv_for_no_stride (bool): Whether to use conv when stride is 1. init_cfg (dict or :obj:`ConfigDict` or list[dict or :obj:`ConfigDict`], - optional): Initialization config dict. Defaults to + optional): Initialization config dict. Defaults to [dict(type='Kaiming', layer='ConvTranspose2d'), dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0)]. """ diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index 808d3c3310..650a585dfb 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -1,10 +1,14 @@ +import math from typing import Dict, List, Tuple import torch +import torch.nn as nn from mmcv.ops import boxes_iou3d from mmdet.models.utils import multi_apply +from mmengine.model import kaiming_init from mmengine.structures import InstanceData from torch import Tensor +from torch.nn.init import constant_ from mmdet3d.models import CenterHead from mmdet3d.models.layers import circle_nms, nms_bev @@ -12,10 +16,6 @@ gaussian_radius) from mmdet3d.registry import MODELS from mmdet3d.structures import Det3DDataSample, xywhr2xyxyr -from mmengine.model import kaiming_init -import math -from torch.nn.init import constant_ -import torch.nn as nn @MODELS.register_module() @@ -53,7 +53,6 @@ def init_weights(self): kaiming_init( m, mode='fan_in', nonlinearity='leaky_relu') - def forward_single(self, x: Tensor) -> dict: """Forward function for CenterPoint. diff --git a/projects/DSVT/dsvt/res_second.py b/projects/DSVT/dsvt/res_second.py index 0755f895f9..072c586181 100644 --- a/projects/DSVT/dsvt/res_second.py +++ b/projects/DSVT/dsvt/res_second.py @@ -1,7 +1,5 @@ # modified from https://github.com/Haiyang-W/DSVT - -import warnings -from typing import Optional, Sequence, Tuple +from typing import Sequence, Tuple from mmengine.model import BaseModule from torch import Tensor diff --git a/projects/DSVT/dsvt/utils.py b/projects/DSVT/dsvt/utils.py index 7845b27f3f..978f9154da 100644 --- a/projects/DSVT/dsvt/utils.py +++ b/projects/DSVT/dsvt/utils.py @@ -9,7 +9,7 @@ from mmdet3d.models.task_modules import CenterPointBBoxCoder from mmdet3d.registry import MODELS, TASK_UTILS from .ops.ingroup_inds.ingroup_inds_op import ingroup_inds -from mmengine import print_log + get_inner_win_inds_cuda = ingroup_inds From fd0825ab094e06e7c13769af8587430c6edb5354 Mon Sep 17 00:00:00 2001 From: sunjihao1999 <578431509@qq.com> Date: Mon, 18 Sep 2023 14:48:10 +0800 Subject: [PATCH 7/7] fix basepoints in_range_3d --- mmdet3d/structures/points/base_points.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mmdet3d/structures/points/base_points.py b/mmdet3d/structures/points/base_points.py index 188d20d270..4cb54ce895 100644 --- a/mmdet3d/structures/points/base_points.py +++ b/mmdet3d/structures/points/base_points.py @@ -247,10 +247,10 @@ def in_range_3d( """ in_range_flags = ((self.tensor[:, 0] > point_range[0]) & (self.tensor[:, 1] > point_range[1]) - # & (self.tensor[:, 2] > point_range[2]) + & (self.tensor[:, 2] > point_range[2]) & (self.tensor[:, 0] < point_range[3]) - & (self.tensor[:, 1] < point_range[4])) - # & (self.tensor[:, 2] < point_range[5])) + & (self.tensor[:, 1] < point_range[4]) + & (self.tensor[:, 2] < point_range[5])) return in_range_flags @property