Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1.x branch fix hccl compile #3237

Merged
merged 2 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
bboxesFP32 = bboxesFP32.to(at::kFloat);
gtboxesFP32 = gtboxesFP32.to(at::kFloat);
}
c10::SmallVector<int64_t, SIZE> iousSize = {gtboxesFP32.size(0),
c10::SmallVector<int64_t, 8> iousSize = {gtboxesFP32.size(0),
bboxesFP32.size(0)};
if (aligned) {
iousSize = {gtboxesFP32.size(0), 1};
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Tensor fused_bias_leakyrelu_npu(const Tensor &input, const Tensor &bias,
if (grad == 0) {
auto input_size = input.sizes();
int input_length = input_size.size();
c10::SmallVector<int64_t, SIZE> input_size_tmp;
c10::SmallVector<int64_t, 8> input_size_tmp;
for (uint64_t i = 0; i < input_size.size(); i++) {
input_size_tmp.emplace_back(input_size[i]);
}
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/nms_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
at::empty({}, boxes.options().dtype(at::kFloat)).fill_(0);
at::Tensor max_outputsize_y =
at::empty({}, boxes.options().dtype(at::kInt)).fill_(boxes.size(0));
c10::SmallVector<int64_t, SIZE> outputsize = {boxes.size(0)};
c10::SmallVector<int64_t, 8> outputsize = {boxes.size(0)};
at::Tensor output =
at::empty(outputsize, boxes.options().dtype(at::kInt)).fill_(-1);
OpCommand cmd;
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
detsCast = detsCast.to(at::kFloat);
scoresCast = scoresCast.to(at::kFloat);
}
c10::SmallVector<int64_t, SIZE> selectedIndexSize = {dets.size(0)};
c10::SmallVector<int64_t, 8> selectedIndexSize = {dets.size(0)};
at::Tensor selectedBox = at::empty_like(dets);
at::Tensor selectedIndex =
at::empty(selectedIndexSize, dets.options().dtype(at::kInt));
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/roi_align_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y,
roi_end_mode = 0;
}
auto shape = grad_input.sizes();
c10::SmallVector<int64_t, SIZE> xdiff_shape;
c10::SmallVector<int64_t, 8> xdiff_shape;
for (uint64_t i = 0; i < shape.size(); i++) {
xdiff_shape.emplace_back(shape[i]);
}
Expand Down
4 changes: 2 additions & 2 deletions mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ void roipoint_pool3d_forward_impl_npu(int batch_size, int pts_num,
Tensor pooled_empty_flag) {
auto points_trans = xyz.transpose(1, 2).contiguous();
auto point_features_trans = pts_feature.transpose(1, 2).contiguous();
c10::SmallVector<int64_t, SIZE> features_trans_size = {
c10::SmallVector<int64_t, 8> features_trans_size = {
xyz.size(0), boxes3d.size(1), xyz.size(2) + pts_feature.size(2),
sampled_pts_num};
at::Tensor pooled_features_trans =
at::empty(features_trans_size, xyz.options());
c10::SmallVector<int64_t, SIZE> empty_flag_size = {boxes3d.size(0),
c10::SmallVector<int64_t, 8> empty_flag_size = {boxes3d.size(0),
boxes3d.size(1)};
EXEC_NPU_CMD(aclnnRoipointPool3dForward, points_trans, point_features_trans,
boxes3d, sampled_pts_num, pooled_features_trans,
Expand Down
5 changes: 5 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,11 @@ def get_mluops_version(file_path):
'torch_npu').submodule_search_locations[0] +
'/include/third_party/acl/inc'
]
extra_compile_args['cxx'] += [
'-I' + importlib.util.find_spec(
'torch_npu').submodule_search_locations[0] +
'/include/third_party/hccl/inc'
]
define_macros += [('MMCV_WITH_NPU', None)]
extension = NpuExtension
if parse_version(torch.__version__) < parse_version('2.1.0'):
Expand Down
Loading