Cache info to optimize computations when ratio of selected indices is…

… high.
isl-org · Feb 6, 2025 · 24ce40a · 24ce40a
1 parent 3fa7f80
commit 24ce40a
Show file tree

Hide file tree

Showing 4 changed files with 175 additions and 69 deletions.
diff --git a/cpp/benchmarks/t/pipelines/registration/Feature.cpp b/cpp/benchmarks/t/pipelines/registration/Feature.cpp
@@ -5,8 +5,6 @@
 // SPDX-License-Identifier: MIT
 // ----------------------------------------------------------------------------
 
-#include <map>
-
 #include "open3d/t/pipelines/registration/Feature.h"
 
 #include <benchmark/benchmark.h>

diff --git a/cpp/open3d/t/pipelines/kernel/Feature.cpp b/cpp/open3d/t/pipelines/kernel/Feature.cpp
@@ -15,15 +15,15 @@ namespace t {
 namespace pipelines {
 namespace kernel {
 
-void ComputeFPFHFeature(const core::Tensor &points,
-                        const core::Tensor &normals,
-                        const core::Tensor &indices,
-                        const core::Tensor &distance2,
-                        const core::Tensor &counts,
-                        core::Tensor &fpfhs,
-                        const utility::optional<core::Tensor> &mask,
-                        const utility::optional<core::Tensor>
-                                &map_batch_info_idx_to_point_idx) {
+void ComputeFPFHFeature(
+        const core::Tensor &points,
+        const core::Tensor &normals,
+        const core::Tensor &indices,
+        const core::Tensor &distance2,
+        const core::Tensor &counts,
+        core::Tensor &fpfhs,
+        const utility::optional<core::Tensor> &mask,
+        const utility::optional<core::Tensor> &map_info_idx_to_point_idx) {
     if (mask.has_value()) {
         const int64_t size =
                 mask.value().To(core::Int64).Sum({0}).Item<int64_t>();
@@ -32,21 +32,22 @@ void ComputeFPFHFeature(const core::Tensor &points,
     } else {
         core::AssertTensorShape(fpfhs, {points.GetLength(), 33});
     }
-    if (map_batch_info_idx_to_point_idx.has_value()) {
-        core::AssertTensorShape(map_batch_info_idx_to_point_idx.value(),
-                                {counts.GetLength()});
+    if (map_info_idx_to_point_idx.has_value()) {
+        const bool is_radius_search = indices.GetShape().size() == 1;
+        core::AssertTensorShape(
+                map_info_idx_to_point_idx.value(),
+                {counts.GetLength() - (is_radius_search ? 1 : 0)});
     }
     const core::Tensor points_d = points.Contiguous();
     const core::Tensor normals_d = normals.Contiguous();
     const core::Tensor counts_d = counts.To(core::Int32);
     if (points_d.IsCPU()) {
         ComputeFPFHFeatureCPU(points_d, normals_d, indices, distance2, counts_d,
-                              fpfhs, mask, map_batch_info_idx_to_point_idx);
+                              fpfhs, mask, map_info_idx_to_point_idx);
     } else {
         core::CUDAScopedDevice scoped_device(points.GetDevice());
         CUDA_CALL(ComputeFPFHFeatureCUDA, points_d, normals_d, indices,
-                  distance2, counts_d, fpfhs, mask,
-                  map_batch_info_idx_to_point_idx);
+                  distance2, counts_d, fpfhs, mask, map_info_idx_to_point_idx);
     }
     utility::LogDebug(
             "[ComputeFPFHFeature] Computed {:d} features from "

diff --git a/cpp/open3d/t/pipelines/kernel/FeatureImpl.h b/cpp/open3d/t/pipelines/kernel/FeatureImpl.h
@@ -116,19 +116,17 @@ void ComputeFPFHFeatureCPU
          const core::Tensor &counts,
          core::Tensor &fpfhs,
          const utility::optional<core::Tensor> &mask,
-         const utility::optional<core::Tensor>
-                 &map_batch_info_idx_to_point_idx) {
+         const utility::optional<core::Tensor> &map_info_idx_to_point_idx) {
     const core::Dtype dtype = points.GetDtype();
     const core::Device device = points.GetDevice();
     const int64_t n_points = points.GetLength();
 
     const bool filter_fpfh =
-            mask.has_value() && map_batch_info_idx_to_point_idx.has_value();
-    if (mask.has_value() ^ map_batch_info_idx_to_point_idx.has_value()) {
+            mask.has_value() && map_info_idx_to_point_idx.has_value();
+    if (mask.has_value() ^ map_info_idx_to_point_idx.has_value()) {
         utility::LogError(
-                "Parameters mask and map_batch_info_idx_to_point_idx must be "
-                "both "
-                "provided or both not provided.");
+                "Parameters mask and map_info_idx_to_point_idx must "
+                "either be both provided or both not provided.");
     }
     if (filter_fpfh) {
         if (mask.value().GetShape()[0] != n_points) {
@@ -137,19 +135,19 @@ void ComputeFPFHFeatureCPU
                     "be equal to the number of points {:d}.",
                     (int)mask.value().GetShape()[0], n_points);
         }
-        if (map_batch_info_idx_to_point_idx.value().GetShape()[0] !=
-            counts.GetShape()[0]) {
+        if (map_info_idx_to_point_idx.value().GetShape()[0] !=
+            counts.GetShape()[0] - (indices.GetShape().size() == 1 ? 1 : 0)) {
             utility::LogError(
-                    "Parameter map_batch_info_idx_to_point_idx was provided, "
+                    "Parameter map_info_idx_to_point_idx was provided, "
                     "but its size"
                     "{:d} should be equal to the size of counts {:d}.",
-                    (int)map_batch_info_idx_to_point_idx.value().GetShape()[0],
+                    (int)map_info_idx_to_point_idx.value().GetShape()[0],
                     (int)counts.GetShape()[0]);
         }
     }
 
     core::Tensor map_spfh_info_idx_to_point_idx =
-            map_batch_info_idx_to_point_idx.value_or(
+            map_info_idx_to_point_idx.value_or(
                     core::Tensor::Empty({0}, core::Int64, device));
 
     const core::Tensor map_fpfh_idx_to_point_idx =

diff --git a/cpp/open3d/t/pipelines/registration/Feature.cpp b/cpp/open3d/t/pipelines/registration/Feature.cpp
@@ -7,6 +7,7 @@
 
 #include "open3d/t/pipelines/registration/Feature.h"
 
+#include "open3d/core/ParallelFor.h"
 #include "open3d/core/nns/NearestNeighborSearch.h"
 #include "open3d/t/geometry/PointCloud.h"
 #include "open3d/t/pipelines/kernel/Feature.h"
@@ -43,58 +44,140 @@ core::Tensor ComputeFPFHFeature(
                                           core::Int32);
     bool tree_set = false;
 
+    const bool filter_fpfh = indices.has_value();
+    // If we are computing a subset of the FPFH feature,
+    // cache some information to speed up the computation
+    // if the ratio of the indices to the total number of points is high.
+    const int cache_info_indices_ratio_thresh = 0.1;
+    bool cache_fpfh_info = true;
+
     core::Tensor mask_fpfh_points;
-    core::Tensor mask_required_points;
-    if (indices.has_value()) {
+    core::Tensor indices_fpfh_points;
+    core::Tensor map_point_idx_to_required_point_idx;
+    core::Tensor map_required_point_idx_to_point_idx;
+    core::Tensor save_p_indices, save_p_distance2, save_p_counts;
+    core::Tensor mask_spfh_points;
+
+    // If we are computing a subset of the FPFH feature, we need to find
+    // the subset of points (neighbors) required to compute the FPFH features.
+    if (filter_fpfh) {
+        if (indices.value().GetLength() == 0) {
+            return core::Tensor::Zeros({0, 33}, dtype, device);
+        }
         mask_fpfh_points =
                 core::Tensor::Zeros({num_points}, core::Bool, device);
-        mask_required_points =
-                core::Tensor::Zeros({num_points}, core::Bool, device);
-        core::Tensor indices_tmp, distance2_tmp, counts_tmp;
         mask_fpfh_points.IndexSet({indices.value()},
                                   core::Tensor::Ones({1}, core::Bool, device));
         const core::Tensor query_point_positions =
-                input.GetPointPositions().IndexGet({indices.value()});
+                input.GetPointPositions().IndexGet({mask_fpfh_points});
+        core::Tensor p_indices, p_distance2, p_counts;
         if (radius.has_value() && max_nn.has_value()) {
             tree_set = tree.HybridIndex(radius.value());
             if (!tree_set) {
                 utility::LogError("Building HybridIndex failed.");
             }
-            std::tie(indices_tmp, distance2_tmp, counts_tmp) =
-                    tree.HybridSearch(query_point_positions, radius.value(),
-                                      max_nn.value());
+            std::tie(p_indices, p_distance2, p_counts) = tree.HybridSearch(
+                    query_point_positions, radius.value(), max_nn.value());
         } else if (!radius.has_value() && max_nn.has_value()) {
             tree_set = tree.KnnIndex();
             if (!tree_set) {
                 utility::LogError("Building KnnIndex failed.");
             }
-            std::tie(indices_tmp, distance2_tmp) =
+            std::tie(p_indices, p_distance2) =
                     tree.KnnSearch(query_point_positions, max_nn.value());
+
+            // Make counts full with min(max_nn, num_points).
+            const int fill_value =
+                    max_nn.value() > num_points ? num_points : max_nn.value();
+            p_counts = core::Tensor::Full({query_point_positions.GetLength()},
+                                          fill_value, core::Int32, device);
         } else if (radius.has_value() && !max_nn.has_value()) {
             tree_set = tree.FixedRadiusIndex(radius.value());
             if (!tree_set) {
                 utility::LogError("Building RadiusIndex failed.");
             }
-            std::tie(indices_tmp, distance2_tmp, counts_tmp) =
-                    tree.FixedRadiusSearch(query_point_positions,
-                                           radius.value());
+            std::tie(p_indices, p_distance2, p_counts) = tree.FixedRadiusSearch(
+                    query_point_positions, radius.value());
         } else {
             utility::LogError("Both max_nn and radius are none.");
         }
 
-        indices_tmp = indices_tmp.To(core::Int64).View({-1});
+        core::Tensor mask_required_points =
+                core::Tensor::Zeros({num_points}, core::Bool, device);
         mask_required_points.IndexSet(
-                {indices_tmp}, core::Tensor::Ones({1}, core::Bool, device));
+                {p_indices.To(core::Int64).View({-1})},
+                core::Tensor::Ones({1}, core::Bool, device));
+        map_required_point_idx_to_point_idx =
+                mask_required_points.NonZero().GetItem(
+                        {core::TensorKey::Index(0)});
+        indices_fpfh_points =
+                mask_fpfh_points.NonZero().GetItem({core::TensorKey::Index(0)});
 
-    } else {
-        mask_fpfh_points = core::Tensor::Zeros({0}, core::Bool, device);
-        mask_required_points = core::Tensor::Zeros({0}, core::Bool, device);
+        const bool is_radius_search = p_indices.GetShape().size() == 1;
+
+        // Cache the info if the ratio of the indices to the total number of
+        // points is high and we are not doing a radius search. Radius search
+        // requires a different pipeline since tensor output p_counts is a
+        // prefix sum.
+        cache_fpfh_info =
+                !is_radius_search &&
+                (static_cast<double>(indices_fpfh_points.GetLength()) >=
+                 cache_info_indices_ratio_thresh *
+                         static_cast<double>(num_points));
+
+        if (cache_fpfh_info) {
+            map_point_idx_to_required_point_idx =
+                    core::Tensor::Full({num_points}, -1, core::Int32, device);
+            map_point_idx_to_required_point_idx.IndexSet(
+                    {map_required_point_idx_to_point_idx},
+                    core::Tensor::Arange(
+                            0, map_required_point_idx_to_point_idx.GetLength(),
+                            1, core::Int32, device));
+
+            core::SizeVector save_p_indices_shape = p_indices.GetShape();
+            save_p_indices_shape[0] =
+                    map_required_point_idx_to_point_idx.GetLength();
+            save_p_indices = core::Tensor::Zeros(save_p_indices_shape,
+                                                 core::Int32, device);
+            save_p_distance2 = core::Tensor::Zeros(save_p_indices.GetShape(),
+                                                   dtype, device);
+            save_p_counts = core::Tensor::Zeros(
+                    {map_required_point_idx_to_point_idx.GetLength() +
+                     (is_radius_search ? 1 : 0)},
+                    core::Int32, device);
+
+            core::Tensor map_fpfh_point_idx_to_required_point_idx =
+                    map_point_idx_to_required_point_idx
+                            .IndexGet({indices_fpfh_points})
+                            .To(core::Int64);
+
+            save_p_indices.IndexSet({map_fpfh_point_idx_to_required_point_idx},
+                                    p_indices);
+            save_p_distance2.IndexSet(
+                    {map_fpfh_point_idx_to_required_point_idx}, p_distance2);
+            save_p_counts.IndexSet({map_fpfh_point_idx_to_required_point_idx},
+                                   p_counts);
+
+            // If we are filtering FPFH features, we have already computed some
+            // info about the FPFH points' neighbors. Now we just need to
+            // compute the info for the remaining required points, so skip the
+            // computation for the already computed info.
+            mask_spfh_points =
+                    core::Tensor::Zeros({num_points}, core::Bool, device);
+            mask_spfh_points.IndexSet(
+                    {map_required_point_idx_to_point_idx},
+                    core::Tensor::Ones({1}, core::Bool, device));
+            mask_spfh_points.IndexSet(
+                    {indices_fpfh_points},
+                    core::Tensor::Zeros({1}, core::Bool, device));
+        } else {
+            mask_spfh_points = mask_required_points;
+        }
     }
 
     const core::Tensor query_point_positions =
-            mask_required_points.GetShape()[0] > 0
-                    ? input.GetPointPositions().IndexGet({mask_required_points})
-                    : input.GetPointPositions();
+            filter_fpfh ? input.GetPointPositions().IndexGet({mask_spfh_points})
+                        : input.GetPointPositions();
 
     // Compute nearest neighbors and squared distances.
     core::Tensor p_indices, p_distance2, p_counts;
@@ -119,14 +202,25 @@ core::Tensor ComputeFPFHFeature(
                 utility::LogError("Building KnnIndex failed.");
             }
         }
-        std::tie(p_indices, p_distance2) =
-                tree.KnnSearch(query_point_positions, max_nn.value());
-
-        // Make counts full with min(max_nn, num_points).
-        const int fill_value =
-                max_nn.value() > num_points ? num_points : max_nn.value();
-        p_counts = core::Tensor::Full({query_point_positions.GetLength()},
-                                      fill_value, core::Int32, device);
+
+        // tree.KnnSearch complains if the query point cloud is empty.
+        if (query_point_positions.GetLength() > 0) {
+            std::tie(p_indices, p_distance2) =
+                    tree.KnnSearch(query_point_positions, max_nn.value());
+
+            const int fill_value =
+                    max_nn.value() > num_points ? num_points : max_nn.value();
+
+            p_counts = core::Tensor::Full({query_point_positions.GetLength()},
+                                          fill_value, core::Int32, device);
+        } else {
+            p_indices = core::Tensor::Zeros({0, max_nn.value()}, core::Int32,
+                                            device);
+            p_distance2 =
+                    core::Tensor::Zeros({0, max_nn.value()}, dtype, device);
+            p_counts = core::Tensor::Zeros({0}, core::Int32, device);
+        }
+
         utility::LogDebug(
                 "Use KNNSearch  [max_nn: {}] for computing FPFH feature.",
                 max_nn.value());
@@ -147,18 +241,33 @@ core::Tensor ComputeFPFHFeature(
     }
 
     core::Tensor fpfh;
-    if (indices.has_value()) {
-        const auto mask_fpfh_points_indices =
-                mask_fpfh_points.NonZero().GetItem({core::TensorKey::Index(0)});
-        const auto map_batch_info_idx_to_point_idx =
-                mask_required_points.NonZero().GetItem(
-                        {core::TensorKey::Index(0)});
-        fpfh = core::Tensor::Zeros({mask_fpfh_points_indices.GetLength(), 33},
-                                   dtype, device);
+    if (filter_fpfh) {
+        const int64_t size = indices_fpfh_points.GetLength();
+        fpfh = core::Tensor::Zeros({size, 33}, dtype, device);
+        core::Tensor final_p_indices, final_p_distance2, final_p_counts;
+        if (cache_fpfh_info) {
+            core::Tensor map_spfh_idx_to_required_point_idx =
+                    map_point_idx_to_required_point_idx
+                            .IndexGet({mask_spfh_points})
+                            .To(core::Int64);
+            save_p_indices.IndexSet({map_spfh_idx_to_required_point_idx},
+                                    p_indices);
+            save_p_distance2.IndexSet({map_spfh_idx_to_required_point_idx},
+                                      p_distance2);
+            save_p_counts.IndexSet({map_spfh_idx_to_required_point_idx},
+                                   p_counts);
+            final_p_indices = save_p_indices;
+            final_p_distance2 = save_p_distance2;
+            final_p_counts = save_p_counts;
+        } else {
+            final_p_indices = p_indices;
+            final_p_distance2 = p_distance2;
+            final_p_counts = p_counts;
+        }
         pipelines::kernel::ComputeFPFHFeature(
-                input.GetPointPositions(), input.GetPointNormals(), p_indices,
-                p_distance2, p_counts, fpfh, mask_fpfh_points,
-                map_batch_info_idx_to_point_idx);
+                input.GetPointPositions(), input.GetPointNormals(),
+                final_p_indices, final_p_distance2, final_p_counts, fpfh,
+                mask_fpfh_points, map_required_point_idx_to_point_idx);
     } else {
         const int64_t size = input.GetPointPositions().GetLength();
         fpfh = core::Tensor::Zeros({size, 33}, dtype, device);