From 24ce40a3a9bdb6ed6b89d0cacb1e81dcbd26ddc3 Mon Sep 17 00:00:00 2001 From: Nicola Loi Date: Thu, 6 Feb 2025 22:01:54 +0100 Subject: [PATCH] Cache info to optimize computations when ratio of selected indices is high. --- .../t/pipelines/registration/Feature.cpp | 2 - cpp/open3d/t/pipelines/kernel/Feature.cpp | 31 +-- cpp/open3d/t/pipelines/kernel/FeatureImpl.h | 22 +- .../t/pipelines/registration/Feature.cpp | 189 ++++++++++++++---- 4 files changed, 175 insertions(+), 69 deletions(-) diff --git a/cpp/benchmarks/t/pipelines/registration/Feature.cpp b/cpp/benchmarks/t/pipelines/registration/Feature.cpp index 8eac0814863..1e81d038873 100644 --- a/cpp/benchmarks/t/pipelines/registration/Feature.cpp +++ b/cpp/benchmarks/t/pipelines/registration/Feature.cpp @@ -5,8 +5,6 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- -#include - #include "open3d/t/pipelines/registration/Feature.h" #include diff --git a/cpp/open3d/t/pipelines/kernel/Feature.cpp b/cpp/open3d/t/pipelines/kernel/Feature.cpp index 6601ef5432f..edbcdf1be7c 100644 --- a/cpp/open3d/t/pipelines/kernel/Feature.cpp +++ b/cpp/open3d/t/pipelines/kernel/Feature.cpp @@ -15,15 +15,15 @@ namespace t { namespace pipelines { namespace kernel { -void ComputeFPFHFeature(const core::Tensor &points, - const core::Tensor &normals, - const core::Tensor &indices, - const core::Tensor &distance2, - const core::Tensor &counts, - core::Tensor &fpfhs, - const utility::optional &mask, - const utility::optional - &map_batch_info_idx_to_point_idx) { +void ComputeFPFHFeature( + const core::Tensor &points, + const core::Tensor &normals, + const core::Tensor &indices, + const core::Tensor &distance2, + const core::Tensor &counts, + core::Tensor &fpfhs, + const utility::optional &mask, + const utility::optional &map_info_idx_to_point_idx) { if (mask.has_value()) { const int64_t size = mask.value().To(core::Int64).Sum({0}).Item(); @@ -32,21 +32,22 @@ void ComputeFPFHFeature(const core::Tensor &points, } else { core::AssertTensorShape(fpfhs, {points.GetLength(), 33}); } - if (map_batch_info_idx_to_point_idx.has_value()) { - core::AssertTensorShape(map_batch_info_idx_to_point_idx.value(), - {counts.GetLength()}); + if (map_info_idx_to_point_idx.has_value()) { + const bool is_radius_search = indices.GetShape().size() == 1; + core::AssertTensorShape( + map_info_idx_to_point_idx.value(), + {counts.GetLength() - (is_radius_search ? 1 : 0)}); } const core::Tensor points_d = points.Contiguous(); const core::Tensor normals_d = normals.Contiguous(); const core::Tensor counts_d = counts.To(core::Int32); if (points_d.IsCPU()) { ComputeFPFHFeatureCPU(points_d, normals_d, indices, distance2, counts_d, - fpfhs, mask, map_batch_info_idx_to_point_idx); + fpfhs, mask, map_info_idx_to_point_idx); } else { core::CUDAScopedDevice scoped_device(points.GetDevice()); CUDA_CALL(ComputeFPFHFeatureCUDA, points_d, normals_d, indices, - distance2, counts_d, fpfhs, mask, - map_batch_info_idx_to_point_idx); + distance2, counts_d, fpfhs, mask, map_info_idx_to_point_idx); } utility::LogDebug( "[ComputeFPFHFeature] Computed {:d} features from " diff --git a/cpp/open3d/t/pipelines/kernel/FeatureImpl.h b/cpp/open3d/t/pipelines/kernel/FeatureImpl.h index 657f9878e14..c2492eedbfa 100644 --- a/cpp/open3d/t/pipelines/kernel/FeatureImpl.h +++ b/cpp/open3d/t/pipelines/kernel/FeatureImpl.h @@ -116,19 +116,17 @@ void ComputeFPFHFeatureCPU const core::Tensor &counts, core::Tensor &fpfhs, const utility::optional &mask, - const utility::optional - &map_batch_info_idx_to_point_idx) { + const utility::optional &map_info_idx_to_point_idx) { const core::Dtype dtype = points.GetDtype(); const core::Device device = points.GetDevice(); const int64_t n_points = points.GetLength(); const bool filter_fpfh = - mask.has_value() && map_batch_info_idx_to_point_idx.has_value(); - if (mask.has_value() ^ map_batch_info_idx_to_point_idx.has_value()) { + mask.has_value() && map_info_idx_to_point_idx.has_value(); + if (mask.has_value() ^ map_info_idx_to_point_idx.has_value()) { utility::LogError( - "Parameters mask and map_batch_info_idx_to_point_idx must be " - "both " - "provided or both not provided."); + "Parameters mask and map_info_idx_to_point_idx must " + "either be both provided or both not provided."); } if (filter_fpfh) { if (mask.value().GetShape()[0] != n_points) { @@ -137,19 +135,19 @@ void ComputeFPFHFeatureCPU "be equal to the number of points {:d}.", (int)mask.value().GetShape()[0], n_points); } - if (map_batch_info_idx_to_point_idx.value().GetShape()[0] != - counts.GetShape()[0]) { + if (map_info_idx_to_point_idx.value().GetShape()[0] != + counts.GetShape()[0] - (indices.GetShape().size() == 1 ? 1 : 0)) { utility::LogError( - "Parameter map_batch_info_idx_to_point_idx was provided, " + "Parameter map_info_idx_to_point_idx was provided, " "but its size" "{:d} should be equal to the size of counts {:d}.", - (int)map_batch_info_idx_to_point_idx.value().GetShape()[0], + (int)map_info_idx_to_point_idx.value().GetShape()[0], (int)counts.GetShape()[0]); } } core::Tensor map_spfh_info_idx_to_point_idx = - map_batch_info_idx_to_point_idx.value_or( + map_info_idx_to_point_idx.value_or( core::Tensor::Empty({0}, core::Int64, device)); const core::Tensor map_fpfh_idx_to_point_idx = diff --git a/cpp/open3d/t/pipelines/registration/Feature.cpp b/cpp/open3d/t/pipelines/registration/Feature.cpp index 5db1170abe9..98932650084 100644 --- a/cpp/open3d/t/pipelines/registration/Feature.cpp +++ b/cpp/open3d/t/pipelines/registration/Feature.cpp @@ -7,6 +7,7 @@ #include "open3d/t/pipelines/registration/Feature.h" +#include "open3d/core/ParallelFor.h" #include "open3d/core/nns/NearestNeighborSearch.h" #include "open3d/t/geometry/PointCloud.h" #include "open3d/t/pipelines/kernel/Feature.h" @@ -43,58 +44,140 @@ core::Tensor ComputeFPFHFeature( core::Int32); bool tree_set = false; + const bool filter_fpfh = indices.has_value(); + // If we are computing a subset of the FPFH feature, + // cache some information to speed up the computation + // if the ratio of the indices to the total number of points is high. + const int cache_info_indices_ratio_thresh = 0.1; + bool cache_fpfh_info = true; + core::Tensor mask_fpfh_points; - core::Tensor mask_required_points; - if (indices.has_value()) { + core::Tensor indices_fpfh_points; + core::Tensor map_point_idx_to_required_point_idx; + core::Tensor map_required_point_idx_to_point_idx; + core::Tensor save_p_indices, save_p_distance2, save_p_counts; + core::Tensor mask_spfh_points; + + // If we are computing a subset of the FPFH feature, we need to find + // the subset of points (neighbors) required to compute the FPFH features. + if (filter_fpfh) { + if (indices.value().GetLength() == 0) { + return core::Tensor::Zeros({0, 33}, dtype, device); + } mask_fpfh_points = core::Tensor::Zeros({num_points}, core::Bool, device); - mask_required_points = - core::Tensor::Zeros({num_points}, core::Bool, device); - core::Tensor indices_tmp, distance2_tmp, counts_tmp; mask_fpfh_points.IndexSet({indices.value()}, core::Tensor::Ones({1}, core::Bool, device)); const core::Tensor query_point_positions = - input.GetPointPositions().IndexGet({indices.value()}); + input.GetPointPositions().IndexGet({mask_fpfh_points}); + core::Tensor p_indices, p_distance2, p_counts; if (radius.has_value() && max_nn.has_value()) { tree_set = tree.HybridIndex(radius.value()); if (!tree_set) { utility::LogError("Building HybridIndex failed."); } - std::tie(indices_tmp, distance2_tmp, counts_tmp) = - tree.HybridSearch(query_point_positions, radius.value(), - max_nn.value()); + std::tie(p_indices, p_distance2, p_counts) = tree.HybridSearch( + query_point_positions, radius.value(), max_nn.value()); } else if (!radius.has_value() && max_nn.has_value()) { tree_set = tree.KnnIndex(); if (!tree_set) { utility::LogError("Building KnnIndex failed."); } - std::tie(indices_tmp, distance2_tmp) = + std::tie(p_indices, p_distance2) = tree.KnnSearch(query_point_positions, max_nn.value()); + + // Make counts full with min(max_nn, num_points). + const int fill_value = + max_nn.value() > num_points ? num_points : max_nn.value(); + p_counts = core::Tensor::Full({query_point_positions.GetLength()}, + fill_value, core::Int32, device); } else if (radius.has_value() && !max_nn.has_value()) { tree_set = tree.FixedRadiusIndex(radius.value()); if (!tree_set) { utility::LogError("Building RadiusIndex failed."); } - std::tie(indices_tmp, distance2_tmp, counts_tmp) = - tree.FixedRadiusSearch(query_point_positions, - radius.value()); + std::tie(p_indices, p_distance2, p_counts) = tree.FixedRadiusSearch( + query_point_positions, radius.value()); } else { utility::LogError("Both max_nn and radius are none."); } - indices_tmp = indices_tmp.To(core::Int64).View({-1}); + core::Tensor mask_required_points = + core::Tensor::Zeros({num_points}, core::Bool, device); mask_required_points.IndexSet( - {indices_tmp}, core::Tensor::Ones({1}, core::Bool, device)); + {p_indices.To(core::Int64).View({-1})}, + core::Tensor::Ones({1}, core::Bool, device)); + map_required_point_idx_to_point_idx = + mask_required_points.NonZero().GetItem( + {core::TensorKey::Index(0)}); + indices_fpfh_points = + mask_fpfh_points.NonZero().GetItem({core::TensorKey::Index(0)}); - } else { - mask_fpfh_points = core::Tensor::Zeros({0}, core::Bool, device); - mask_required_points = core::Tensor::Zeros({0}, core::Bool, device); + const bool is_radius_search = p_indices.GetShape().size() == 1; + + // Cache the info if the ratio of the indices to the total number of + // points is high and we are not doing a radius search. Radius search + // requires a different pipeline since tensor output p_counts is a + // prefix sum. + cache_fpfh_info = + !is_radius_search && + (static_cast(indices_fpfh_points.GetLength()) >= + cache_info_indices_ratio_thresh * + static_cast(num_points)); + + if (cache_fpfh_info) { + map_point_idx_to_required_point_idx = + core::Tensor::Full({num_points}, -1, core::Int32, device); + map_point_idx_to_required_point_idx.IndexSet( + {map_required_point_idx_to_point_idx}, + core::Tensor::Arange( + 0, map_required_point_idx_to_point_idx.GetLength(), + 1, core::Int32, device)); + + core::SizeVector save_p_indices_shape = p_indices.GetShape(); + save_p_indices_shape[0] = + map_required_point_idx_to_point_idx.GetLength(); + save_p_indices = core::Tensor::Zeros(save_p_indices_shape, + core::Int32, device); + save_p_distance2 = core::Tensor::Zeros(save_p_indices.GetShape(), + dtype, device); + save_p_counts = core::Tensor::Zeros( + {map_required_point_idx_to_point_idx.GetLength() + + (is_radius_search ? 1 : 0)}, + core::Int32, device); + + core::Tensor map_fpfh_point_idx_to_required_point_idx = + map_point_idx_to_required_point_idx + .IndexGet({indices_fpfh_points}) + .To(core::Int64); + + save_p_indices.IndexSet({map_fpfh_point_idx_to_required_point_idx}, + p_indices); + save_p_distance2.IndexSet( + {map_fpfh_point_idx_to_required_point_idx}, p_distance2); + save_p_counts.IndexSet({map_fpfh_point_idx_to_required_point_idx}, + p_counts); + + // If we are filtering FPFH features, we have already computed some + // info about the FPFH points' neighbors. Now we just need to + // compute the info for the remaining required points, so skip the + // computation for the already computed info. + mask_spfh_points = + core::Tensor::Zeros({num_points}, core::Bool, device); + mask_spfh_points.IndexSet( + {map_required_point_idx_to_point_idx}, + core::Tensor::Ones({1}, core::Bool, device)); + mask_spfh_points.IndexSet( + {indices_fpfh_points}, + core::Tensor::Zeros({1}, core::Bool, device)); + } else { + mask_spfh_points = mask_required_points; + } } const core::Tensor query_point_positions = - mask_required_points.GetShape()[0] > 0 - ? input.GetPointPositions().IndexGet({mask_required_points}) - : input.GetPointPositions(); + filter_fpfh ? input.GetPointPositions().IndexGet({mask_spfh_points}) + : input.GetPointPositions(); // Compute nearest neighbors and squared distances. core::Tensor p_indices, p_distance2, p_counts; @@ -119,14 +202,25 @@ core::Tensor ComputeFPFHFeature( utility::LogError("Building KnnIndex failed."); } } - std::tie(p_indices, p_distance2) = - tree.KnnSearch(query_point_positions, max_nn.value()); - - // Make counts full with min(max_nn, num_points). - const int fill_value = - max_nn.value() > num_points ? num_points : max_nn.value(); - p_counts = core::Tensor::Full({query_point_positions.GetLength()}, - fill_value, core::Int32, device); + + // tree.KnnSearch complains if the query point cloud is empty. + if (query_point_positions.GetLength() > 0) { + std::tie(p_indices, p_distance2) = + tree.KnnSearch(query_point_positions, max_nn.value()); + + const int fill_value = + max_nn.value() > num_points ? num_points : max_nn.value(); + + p_counts = core::Tensor::Full({query_point_positions.GetLength()}, + fill_value, core::Int32, device); + } else { + p_indices = core::Tensor::Zeros({0, max_nn.value()}, core::Int32, + device); + p_distance2 = + core::Tensor::Zeros({0, max_nn.value()}, dtype, device); + p_counts = core::Tensor::Zeros({0}, core::Int32, device); + } + utility::LogDebug( "Use KNNSearch [max_nn: {}] for computing FPFH feature.", max_nn.value()); @@ -147,18 +241,33 @@ core::Tensor ComputeFPFHFeature( } core::Tensor fpfh; - if (indices.has_value()) { - const auto mask_fpfh_points_indices = - mask_fpfh_points.NonZero().GetItem({core::TensorKey::Index(0)}); - const auto map_batch_info_idx_to_point_idx = - mask_required_points.NonZero().GetItem( - {core::TensorKey::Index(0)}); - fpfh = core::Tensor::Zeros({mask_fpfh_points_indices.GetLength(), 33}, - dtype, device); + if (filter_fpfh) { + const int64_t size = indices_fpfh_points.GetLength(); + fpfh = core::Tensor::Zeros({size, 33}, dtype, device); + core::Tensor final_p_indices, final_p_distance2, final_p_counts; + if (cache_fpfh_info) { + core::Tensor map_spfh_idx_to_required_point_idx = + map_point_idx_to_required_point_idx + .IndexGet({mask_spfh_points}) + .To(core::Int64); + save_p_indices.IndexSet({map_spfh_idx_to_required_point_idx}, + p_indices); + save_p_distance2.IndexSet({map_spfh_idx_to_required_point_idx}, + p_distance2); + save_p_counts.IndexSet({map_spfh_idx_to_required_point_idx}, + p_counts); + final_p_indices = save_p_indices; + final_p_distance2 = save_p_distance2; + final_p_counts = save_p_counts; + } else { + final_p_indices = p_indices; + final_p_distance2 = p_distance2; + final_p_counts = p_counts; + } pipelines::kernel::ComputeFPFHFeature( - input.GetPointPositions(), input.GetPointNormals(), p_indices, - p_distance2, p_counts, fpfh, mask_fpfh_points, - map_batch_info_idx_to_point_idx); + input.GetPointPositions(), input.GetPointNormals(), + final_p_indices, final_p_distance2, final_p_counts, fpfh, + mask_fpfh_points, map_required_point_idx_to_point_idx); } else { const int64_t size = input.GetPointPositions().GetLength(); fpfh = core::Tensor::Zeros({size, 33}, dtype, device);