From 7dfcacafa29801b97712bc74d503951eefe65d2e Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Tue, 3 Dec 2024 11:46:15 +0900 Subject: [PATCH 01/19] feat: implemented a cuda accelerated ogm Signed-off-by: Kenzo Lobos-Tsunekawa --- .../CMakeLists.txt | 25 +- .../costmap_2d/occupancy_grid_map.hpp | 9 +- .../costmap_2d/occupancy_grid_map_base.hpp | 63 +- .../costmap_2d/occupancy_grid_map_fixed.hpp | 16 +- .../occupancy_grid_map_fixed_kernel.hpp | 60 ++ .../occupancy_grid_map_projective.hpp | 21 +- .../occupancy_grid_map_projective_kernel.hpp | 70 +++ .../updater/binary_bayes_filter_updater.hpp | 10 +- .../binary_bayes_filter_updater_kernel.hpp | 38 ++ .../updater/log_odds_bayes_filter_updater.hpp | 10 +- .../log_odds_bayes_filter_updater_kernel.hpp | 36 ++ .../updater/ogm_updater_interface.hpp | 12 +- .../utils/cuda_pointcloud.hpp | 53 ++ .../utils/utils.hpp | 50 +- .../utils/utils_kernel.hpp | 51 ++ .../lib/costmap_2d/occupancy_grid_map.cpp | 53 +- .../costmap_2d/occupancy_grid_map_base.cpp | 190 +++---- .../costmap_2d/occupancy_grid_map_fixed.cpp | 281 +++------ .../occupancy_grid_map_fixed_kernel.cu | 383 +++++++++++++ .../occupancy_grid_map_projective.cpp | 364 ++++-------- .../occupancy_grid_map_projective_kernel.cu | 538 ++++++++++++++++++ .../updater/binary_bayes_filter_updater.cpp | 55 +- .../binary_bayes_filter_updater_kernel.cu | 81 +++ .../updater/log_odds_bayes_filter_updater.cpp | 34 +- .../log_odds_bayes_filter_updater_kernel.cu | 93 +++ .../lib/utils/utils.cpp | 79 +-- .../lib/utils/utils_kernel.cu | 295 ++++++++++ .../package.xml | 1 + .../synchronized_grid_map_fusion_node.cpp | 42 +- .../synchronized_grid_map_fusion_node.hpp | 5 +- ...aserscan_based_occupancy_grid_map_node.cpp | 4 +- ...intcloud_based_occupancy_grid_map_node.cpp | 60 +- ...intcloud_based_occupancy_grid_map_node.hpp | 6 +- 33 files changed, 2211 insertions(+), 877 deletions(-) create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed_kernel.hpp create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective_kernel.hpp create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed_kernel.cu create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu create mode 100644 perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu diff --git a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt index 28342ecb5bcff..3275b2db7a68f 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt +++ b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt @@ -4,27 +4,49 @@ project(autoware_probabilistic_occupancy_grid_map) find_package(autoware_cmake REQUIRED) autoware_package() +find_package(CUDA REQUIRED) find_package(eigen3_cmake_module REQUIRED) find_package(Eigen3 REQUIRED) find_package(PCL REQUIRED) include_directories( + include SYSTEM + ${CUDA_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR} + ${grid_map_ros_INCLUDE_DIRS} +) + +if(NOT ${CUDA_FOUND}) + message(WARNING "cuda was not found, so the autoware_probabilistic_occupancy_grid_map package will not be built.") + return() +endif() + +list(APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr -diag-suppress 20012 --compiler-options -fPIC) +set(CUDA_SEPARABLE_COMPILATION ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +cuda_add_library(${PROJECT_NAME}_cuda SHARED + lib/costmap_2d/occupancy_grid_map_fixed_kernel.cu + lib/costmap_2d/occupancy_grid_map_projective_kernel.cu + lib/updater/binary_bayes_filter_updater_kernel.cu + lib/updater/log_odds_bayes_filter_updater_kernel.cu + lib/utils/utils_kernel.cu ) ament_auto_add_library(${PROJECT_NAME}_common SHARED + lib/costmap_2d/occupancy_grid_map_base.cpp lib/updater/binary_bayes_filter_updater.cpp lib/utils/utils.cpp ) target_link_libraries(${PROJECT_NAME}_common ${PCL_LIBRARIES} + ${PROJECT_NAME}_cuda ) # PointcloudBasedOccupancyGridMap ament_auto_add_library(pointcloud_based_occupancy_grid_map SHARED src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp - lib/costmap_2d/occupancy_grid_map_base.cpp lib/costmap_2d/occupancy_grid_map_fixed.cpp lib/costmap_2d/occupancy_grid_map_projective.cpp ) @@ -32,6 +54,7 @@ ament_auto_add_library(pointcloud_based_occupancy_grid_map SHARED target_link_libraries(pointcloud_based_occupancy_grid_map ${PCL_LIBRARIES} ${PROJECT_NAME}_common + ${PROJECT_NAME}_cuda ) rclcpp_components_register_node(pointcloud_based_occupancy_grid_map diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map.hpp index 93e768c0f6b4e..5fe3613cc7612 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map.hpp @@ -52,7 +52,8 @@ #ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_HPP_ #define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_HPP_ -#include +#include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp" + #include #include @@ -66,7 +67,7 @@ namespace costmap_2d using geometry_msgs::msg::Pose; using sensor_msgs::msg::PointCloud2; -class OccupancyGridMap : public nav2_costmap_2d::Costmap2D +class OccupancyGridMap : public OccupancyGridMapInterface { public: OccupancyGridMap( @@ -78,8 +79,6 @@ class OccupancyGridMap : public nav2_costmap_2d::Costmap2D void updateOccupiedCells(const PointCloud2 & pointcloud); - void updateOrigin(double new_origin_x, double new_origin_y) override; - private: void raytraceFreespace(const PointCloud2 & pointcloud, const Pose & robot_pose); @@ -87,6 +86,8 @@ class OccupancyGridMap : public nav2_costmap_2d::Costmap2D bool worldToMap(double wx, double wy, unsigned int & mx, unsigned int & my) const; + void initRosParam([[maybe_unused]] rclcpp::Node & node) override {} + rclcpp::Logger logger_{rclcpp::get_logger("laserscan_based_occupancy_grid_map")}; rclcpp::Clock clock_{RCL_ROS_TIME}; }; diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp index 09ad0a4d554a1..8272dda967114 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp @@ -52,6 +52,9 @@ #ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_BASE_HPP_ #define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_BASE_HPP_ +#include "autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp" + +#include #include #include #include @@ -72,18 +75,17 @@ class OccupancyGridMapInterface : public nav2_costmap_2d::Costmap2D { public: OccupancyGridMapInterface( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution); + const bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution); virtual void updateWithPointCloud( - const PointCloud2 & raw_pointcloud, const PointCloud2 & obstacle_pointcloud, - const Pose & robot_pose, const Pose & scan_origin) = 0; + [[maybe_unused]] const CudaPointCloud2 & raw_pointcloud, + [[maybe_unused]] const CudaPointCloud2 & obstacle_pointcloud, + [[maybe_unused]] const Pose & robot_pose, [[maybe_unused]] const Pose & scan_origin) {}; void updateOrigin(double new_origin_x, double new_origin_y) override; - void raytrace( - const double source_x, const double source_y, const double target_x, const double target_y, - const unsigned char cost); - void setCellValue(const double wx, const double wy, const unsigned char cost); - using nav2_costmap_2d::Costmap2D::resetMaps; + + void resetMaps() override; virtual void initRosParam(rclcpp::Node & node) = 0; @@ -92,47 +94,32 @@ class OccupancyGridMapInterface : public nav2_costmap_2d::Costmap2D double min_height_; double max_height_; - void setFieldOffsets(const PointCloud2 & input_raw, const PointCloud2 & input_obstacle); - - int x_offset_raw_; - int y_offset_raw_; - int z_offset_raw_; - int x_offset_obstacle_; - int y_offset_obstacle_; - int z_offset_obstacle_; - bool offset_initialized_; - const double min_angle_ = autoware::universe_utils::deg2rad(-180.0); const double max_angle_ = autoware::universe_utils::deg2rad(180.0); const double angle_increment_inv_ = 1.0 / autoware::universe_utils::deg2rad(0.1); Eigen::Matrix4f mat_map_, mat_scan_; - bool isPointValid(const Eigen::Vector4f & pt) const - { - // Apply height filter and exclude invalid points - return min_height_ < pt[2] && pt[2] < max_height_ && std::isfinite(pt[0]) && - std::isfinite(pt[1]) && std::isfinite(pt[2]); - } - // Transform pt to (pt_map, pt_scan), then calculate angle_bin_index and range - void transformPointAndCalculate( - const Eigen::Vector4f & pt, Eigen::Vector4f & pt_map, int & angle_bin_index, - double & range) const - { - pt_map = mat_map_ * pt; - Eigen::Vector4f pt_scan(mat_scan_ * pt_map); - const double angle = atan2(pt_scan[1], pt_scan[0]); - angle_bin_index = (angle - min_angle_) * angle_increment_inv_; - range = std::sqrt(pt_scan[1] * pt_scan[1] + pt_scan[0] * pt_scan[0]); - } - -private: - bool worldToMap(double wx, double wy, unsigned int & mx, unsigned int & my) const; + bool isCudaEnabled() const; + const autoware::cuda_utils::CudaUniquePtr & getDeviceCostmap() const; + +protected: rclcpp::Logger logger_{rclcpp::get_logger("pointcloud_based_occupancy_grid_map")}; rclcpp::Clock clock_{RCL_ROS_TIME}; double resolution_inv_; + + cudaStream_t stream_; + + bool use_cuda_; + autoware::cuda_utils::CudaUniquePtr device_costmap_; + autoware::cuda_utils::CudaUniquePtr device_costmap_aux_; + + autoware::cuda_utils::CudaUniquePtr device_rotation_map_; + autoware::cuda_utils::CudaUniquePtr device_translation_map_; + autoware::cuda_utils::CudaUniquePtr device_rotation_scan_; + autoware::cuda_utils::CudaUniquePtr device_translation_scan_; }; } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed.hpp index 3301fd1987bd3..320d297e0a4d1 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed.hpp @@ -16,6 +16,7 @@ #define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_FIXED_HPP_ #include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp" +#include "autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp" namespace autoware::occupancy_grid_map { @@ -28,21 +29,20 @@ class OccupancyGridMapFixedBlindSpot : public OccupancyGridMapInterface { public: OccupancyGridMapFixedBlindSpot( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution); + const bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution); void updateWithPointCloud( - const PointCloud2 & raw_pointcloud, const PointCloud2 & obstacle_pointcloud, + const CudaPointCloud2 & raw_pointcloud, const CudaPointCloud2 & obstacle_pointcloud, const Pose & robot_pose, const Pose & scan_origin) override; - using OccupancyGridMapInterface::raytrace; - using OccupancyGridMapInterface::setCellValue; - using OccupancyGridMapInterface::setFieldOffsets; - using OccupancyGridMapInterface::updateOrigin; - void initRosParam(rclcpp::Node & node) override; -private: +protected: double distance_margin_; + + autoware::cuda_utils::CudaUniquePtr raw_points_tensor_; + autoware::cuda_utils::CudaUniquePtr obstacle_points_tensor_; }; } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed_kernel.hpp new file mode 100644 index 0000000000000..d87c8eec718e1 --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed_kernel.hpp @@ -0,0 +1,60 @@ +// Copyright 2021 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_FIXED_KERNEL_HPP_ +#define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_FIXED_KERNEL_HPP_ + +#include "autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp" + +#include + +#include + +namespace autoware::occupancy_grid_map +{ +namespace costmap_2d::map_fixed +{ + +void prepareTensorLaunch( + const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, + const std::size_t angle_bins, const std::size_t range_bins, const float min_height, + const float max_height, const float min_angle, const float angle_increment_inv, + const float range_resolution_inv, const Eigen::Matrix3f * rotation_map, + const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, + const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor, cudaStream_t stream); + +void fillEmptySpaceLaunch( + const std::uint64_t * points_tensor, const std::size_t angle_bins, const std::size_t range_bins, + const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t empty_value, std::uint8_t * costmap_tensor, cudaStream_t stream); + +void fillUnknownSpaceLaunch( + const std::uint64_t * raw_points_tensor, const std::uint64_t * obstacle_points_tensor, + const float distance_margin, const std::size_t angle_bins, const std::size_t range_bins, + const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t free_space_value, std::uint8_t no_information_value, std::uint8_t * costmap_tensor, + cudaStream_t stream); + +void fillObstaclesLaunch( + const std::uint64_t * points_tensor, const float distance_margin, const std::size_t angle_bins, + const std::size_t range_bins, const float map_resolution_inv, const float map_origin_x, + const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t obstacle_value, std::uint8_t * costmap_tensor, cudaStream_t stream); + +} // namespace costmap_2d::map_fixed +} // namespace autoware::occupancy_grid_map + +#endif // AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_FIXED_KERNEL_HPP_ diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective.hpp index 7569a60b36466..9b23b9ecf8cb6 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective.hpp @@ -32,25 +32,22 @@ class OccupancyGridMapProjectiveBlindSpot : public OccupancyGridMapInterface { public: OccupancyGridMapProjectiveBlindSpot( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution); + const bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution); void updateWithPointCloud( - const PointCloud2 & raw_pointcloud, const PointCloud2 & obstacle_pointcloud, + const CudaPointCloud2 & raw_pointcloud, const CudaPointCloud2 & obstacle_pointcloud, const Pose & robot_pose, const Pose & scan_origin) override; - using OccupancyGridMapInterface::raytrace; - using OccupancyGridMapInterface::setCellValue; - using OccupancyGridMapInterface::setFieldOffsets; - using OccupancyGridMapInterface::updateOrigin; - void initRosParam(rclcpp::Node & node) override; private: - double projection_dz_threshold_; - double obstacle_separation_threshold_; - bool pub_debug_grid_; - grid_map::GridMap debug_grid_; - rclcpp::Publisher::SharedPtr debug_grid_map_publisher_ptr_; + float projection_dz_threshold_; + float obstacle_separation_threshold_; + + autoware::cuda_utils::CudaUniquePtr raw_points_tensor_; + autoware::cuda_utils::CudaUniquePtr obstacle_points_tensor_; + autoware::cuda_utils::CudaUniquePtr device_translation_scan_origin_; }; } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective_kernel.hpp new file mode 100644 index 0000000000000..916cf6ed9ec0a --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective_kernel.hpp @@ -0,0 +1,70 @@ +// Copyright 2021 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_PROJECTIVE_KERNEL_HPP_ +#define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_PROJECTIVE_KERNEL_HPP_ + +#include "autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp" + +#include + +#include + +namespace autoware::occupancy_grid_map +{ +namespace costmap_2d::map_projective +{ + +void prepareRawTensorLaunch( + const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, + const std::size_t angle_bins, const std::size_t range_bins, const float min_height, + const float max_height, const float min_angle, const float angle_increment_inv, + const float range_resolution_inv, const Eigen::Matrix3f * rotation_map, + const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, + const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor, cudaStream_t stream); + +void prepareObstacleTensorLaunch( + const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, + const std::size_t angle_bins, const std::size_t range_bins, const float min_height, + const float max_height, const float min_angle, const float angle_increment_inv, + const float range_resolution_inv, const float projection_dz_threshold, + const Eigen::Vector3f * translation_scan_origin, const Eigen::Matrix3f * rotation_map, + const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, + const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor, cudaStream_t stream); + +void fillEmptySpaceLaunch( + const std::uint64_t * points_tensor, const std::size_t angle_bins, const std::size_t range_bins, + const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t empty_value, std::uint8_t * costmap_tensor, cudaStream_t stream); + +void fillUnknownSpaceLaunch( + const std::uint64_t * raw_points_tensor, const std::uint64_t * obstacle_points_tensor, + const float obstacle_separation_threshold, const std::size_t angle_bins, + const std::size_t range_bins, const float map_resolution_inv, const float scan_origin_x, + const float scan_origin_y, const float scan_origin_z, const float map_origin_x, + const float map_origin_y, const float robot_pose_z, const int num_cells_x, const int num_cells_y, + std::uint8_t free_space_value, std::uint8_t no_information_value, std::uint8_t * costmap_tensor, + cudaStream_t stream); + +void fillObstaclesLaunch( + const std::uint64_t * points_tensor, const float distance_margin, const std::size_t angle_bins, + const std::size_t range_bins, const float map_resolution_inv, const float map_origin_x, + const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t obstacle_value, std::uint8_t * costmap_tensor, cudaStream_t stream); + +} // namespace costmap_2d::map_projective +} // namespace autoware::occupancy_grid_map + +#endif // AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__COSTMAP_2D__OCCUPANCY_GRID_MAP_PROJECTIVE_KERNEL_HPP_ diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater.hpp index af28b7962b3bc..fe5739f2cadc7 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater.hpp @@ -15,6 +15,7 @@ #ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__BINARY_BAYES_FILTER_UPDATER_HPP_ #define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__BINARY_BAYES_FILTER_UPDATER_HPP_ +#include "autoware/cuda_utils/cuda_unique_ptr.hpp" #include "autoware/probabilistic_occupancy_grid_map/updater/ogm_updater_interface.hpp" #include @@ -27,16 +28,19 @@ namespace costmap_2d class OccupancyGridMapBBFUpdater : public OccupancyGridMapUpdaterInterface { public: - enum Index : size_t { OCCUPIED = 0U, FREE = 1U }; + enum Index : size_t { OCCUPIED = 0U, FREE = 1U, NUM_STATES = 2U }; OccupancyGridMapBBFUpdater( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution); - bool update(const Costmap2D & single_frame_occupancy_grid_map) override; + const bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution); + bool update(const OccupancyGridMapInterface & single_frame_occupancy_grid_map) override; void initRosParam(rclcpp::Node & node) override; private: inline unsigned char applyBBF(const unsigned char & z, const unsigned char & o); Eigen::Matrix2f probability_matrix_; double v_ratio_; + + autoware::cuda_utils::CudaUniquePtr device_probability_matrix_; }; } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp new file mode 100644 index 0000000000000..78056c604a427 --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp @@ -0,0 +1,38 @@ +// Copyright 2021 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__BINARY_BAYES_FILTER_UPDATER_KERNEL_HPP_ +#define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__BINARY_BAYES_FILTER_UPDATER_KERNEL_HPP_ + +#include "autoware/cuda_utils/cuda_unique_ptr.hpp" + +#include + +#include + +namespace autoware::occupancy_grid_map +{ +namespace costmap_2d +{ + +void applyBBFLaunch( + const std::uint8_t * z_costmap, const float * probability_matrix, const int num_states, + const int free_index, const int occupied_index, const std::uint8_t free_space_value, + const std::uint8_t lethal_ostacle_value, const std::uint8_t no_information_value, + const double v_ratio_, const int num_elements, std::uint8_t * o_costmap, cudaStream_t stream); + +} // namespace costmap_2d +} // namespace autoware::occupancy_grid_map + +#endif // AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__BINARY_BAYES_FILTER_UPDATER_KERNEL_HPP_ diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater.hpp index d7bb1184c06d6..d24fc7c46331d 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater.hpp @@ -32,18 +32,18 @@ namespace costmap_2d class OccupancyGridMapLOBFUpdater : public OccupancyGridMapUpdaterInterface { public: - enum Index : size_t { OCCUPIED = 0U, FREE = 1U }; + enum Index : size_t { OCCUPIED = 0U, FREE = 1U, NUM_STATES = 2U }; OccupancyGridMapLOBFUpdater( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution) - : OccupancyGridMapUpdaterInterface(cells_size_x, cells_size_y, resolution) + const bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution) + : OccupancyGridMapUpdaterInterface(use_cuda, cells_size_x, cells_size_y, resolution) { } - bool update(const Costmap2D & single_frame_occupancy_grid_map) override; + bool update(const OccupancyGridMapInterface & single_frame_occupancy_grid_map) override; void initRosParam(rclcpp::Node & node) override; private: inline unsigned char applyLOBF(const unsigned char & z, const unsigned char & o); - Eigen::Matrix2f probability_matrix_; }; } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp new file mode 100644 index 0000000000000..d61cf4f241662 --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp @@ -0,0 +1,36 @@ +// Copyright 2021 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__LOG_ODDS_BAYES_FILTER_UPDATER_KERNEL_HPP_ +#define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__LOG_ODDS_BAYES_FILTER_UPDATER_KERNEL_HPP_ + +#include "autoware/cuda_utils/cuda_unique_ptr.hpp" + +#include + +#include + +namespace autoware::occupancy_grid_map +{ +namespace costmap_2d +{ + +void applyLOBFLaunch( + const std::uint8_t * z_costmap, const std::uint8_t no_information_value, const int num_elements, + std::uint8_t * o_costmap, cudaStream_t stream); + +} // namespace costmap_2d +} // namespace autoware::occupancy_grid_map + +#endif // AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__LOG_ODDS_BAYES_FILTER_UPDATER_KERNEL_HPP_ diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/ogm_updater_interface.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/ogm_updater_interface.hpp index 75231089ac33c..ae9f824bb9beb 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/ogm_updater_interface.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/ogm_updater_interface.hpp @@ -16,24 +16,26 @@ #define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__OGM_UPDATER_INTERFACE_HPP_ #include "autoware/probabilistic_occupancy_grid_map/cost_value/cost_value.hpp" +#include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp" +#include "autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp" -#include #include namespace autoware::occupancy_grid_map { namespace costmap_2d { -class OccupancyGridMapUpdaterInterface : public nav2_costmap_2d::Costmap2D +class OccupancyGridMapUpdaterInterface : public OccupancyGridMapInterface { public: OccupancyGridMapUpdaterInterface( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution) - : Costmap2D(cells_size_x, cells_size_y, resolution, 0.f, 0.f, cost_value::NO_INFORMATION) + bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution) + : OccupancyGridMapInterface(use_cuda, cells_size_x, cells_size_y, resolution) { } virtual ~OccupancyGridMapUpdaterInterface() = default; - virtual bool update(const Costmap2D & single_frame_occupancy_grid_map) = 0; + virtual bool update(const OccupancyGridMapInterface & single_frame_occupancy_grid_map) = 0; virtual void initRosParam(rclcpp::Node & node) = 0; }; diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp new file mode 100644 index 0000000000000..247787d341ed6 --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp @@ -0,0 +1,53 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UTILS__CUDA_POINTCLOUD_HPP_ +#define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UTILS__CUDA_POINTCLOUD_HPP_ + +#include + +#include + +#include + +class CudaPointCloud2 : public sensor_msgs::msg::PointCloud2 +{ +public: + void fromROSMsg(const sensor_msgs::msg::PointCloud2 & msg) + { + // NOTE(knzo25): replace this with the cuda blackboard later + header = msg.header; + fields = msg.fields; + height = msg.height; + width = msg.width; + is_bigendian = msg.is_bigendian; + point_step = msg.point_step; + row_step = msg.row_step; + is_dense = msg.is_dense; + + if (!data || capacity_ < msg.data.size()) { + capacity_ = 1024 * (msg.data.size() / 1024 + 1); + data = autoware::cuda_utils::make_unique(capacity_); + } + + cudaMemcpy(data.get(), msg.data.data(), msg.data.size(), cudaMemcpyHostToDevice); + } + + autoware::cuda_utils::CudaUniquePtr data; + +private: + std::size_t capacity_{0}; +}; + +#endif // AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UTILS__CUDA_POINTCLOUD_HPP_ diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils.hpp index 41c7294ded849..d8e8451d8788f 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils.hpp @@ -16,6 +16,8 @@ #define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UTILS__UTILS_HPP_ #include "autoware/probabilistic_occupancy_grid_map/cost_value/cost_value.hpp" +#include "autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp" +#include "autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp" #include #include @@ -50,52 +52,12 @@ namespace autoware::occupancy_grid_map namespace utils { -/** - * @brief 3D point struct for sorting and searching - * - */ -struct MyPoint3d -{ - float x; - float y; - float z; - - // constructor - MyPoint3d(float x, float y, float z) : x(x), y(y), z(z) {} - - // calc squared norm - float norm2() const { return powf(x, 2) + powf(y, 2) + powf(z, 2); } - - // calc arctan2 - float arctan2() const { return atan2f(y, x); } - - // overload operator< - bool operator<(const MyPoint3d & other) const - { - const auto a = norm2(); - const auto b = other.norm2(); - if (a == b) { // escape when norm2 is same - return arctan2() < other.arctan2(); - } else { - return a < b; - } - } - - // overload operator== - bool operator==(const MyPoint3d & other) const - { - return fabsf(x - other.x) < FLT_EPSILON && fabsf(y - other.y) < FLT_EPSILON && - fabsf(z - other.z) < FLT_EPSILON; - } -}; - bool transformPointcloud( const sensor_msgs::msg::PointCloud2 & input, const tf2_ros::Buffer & tf2, const std::string & target_frame, sensor_msgs::msg::PointCloud2 & output); -void transformPointcloud( - const sensor_msgs::msg::PointCloud2 & input, const geometry_msgs::msg::Pose & pose, - sensor_msgs::msg::PointCloud2 & output); +bool transformPointcloud( + CudaPointCloud2 & input, const tf2_ros::Buffer & tf2, const std::string & target_frame); Eigen::Matrix4f getTransformMatrix(const geometry_msgs::msg::Pose & pose); @@ -116,10 +78,6 @@ geometry_msgs::msg::Pose getPose( // get inverted pose geometry_msgs::msg::Pose getInversePose(const geometry_msgs::msg::Pose & pose); -bool extractCommonPointCloud( - const sensor_msgs::msg::PointCloud2 & obstacle_pc, const sensor_msgs::msg::PointCloud2 & raw_pc, - sensor_msgs::msg::PointCloud2 & output_obstacle_pc); - } // namespace utils } // namespace autoware::occupancy_grid_map diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp new file mode 100644 index 0000000000000..c74424f900b91 --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp @@ -0,0 +1,51 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UTILS__UTILS_KERNEL_HPP_ +#define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UTILS__UTILS_KERNEL_HPP_ + +#include + +#include + +#include + +namespace autoware::occupancy_grid_map +{ +namespace utils +{ + +void __device__ setCellValue( + float wx, float wy, float origin_x, float origin_y, float resolution_inv, int size_x, int size_y, + std::uint8_t value, std::uint8_t * costmap_tensor); + +void __device__ raytrace( + const float source_x, const float source_y, const float target_x, const float target_y, + const float origin_x, float origin_y, const float resolution_inv, const int size_x, + const int size_y, const std::uint8_t cost, std::uint8_t * costmap_tensor); + +void copyMapRegionLaunch( + const std::uint8_t * source_map, unsigned int sm_lower_left_x, unsigned int sm_lower_left_y, + unsigned int sm_size_x, std::uint8_t * dest_map, unsigned int dm_lower_left_x, + unsigned int dm_lower_left_y, unsigned int dm_size_x, unsigned int region_size_x, + unsigned int region_size_y, cudaStream_t stream); + +void transformPointCloudLaunch( + std::uint8_t * points, std::size_t num_points, std::size_t points_step, + const Eigen::Matrix3f & rotation, const Eigen::Vector3f & translation); + +} // namespace utils +} // namespace autoware::occupancy_grid_map + +#endif // AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UTILS__UTILS_KERNEL_HPP_ diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map.cpp index 25eafcd564e2d..082985ca824af 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map.cpp @@ -65,7 +65,7 @@ using sensor_msgs::PointCloud2ConstIterator; OccupancyGridMap::OccupancyGridMap( const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution) -: Costmap2D(cells_size_x, cells_size_y, resolution, 0.f, 0.f, cost_value::NO_INFORMATION) +: OccupancyGridMapInterface(false, cells_size_x, cells_size_y, resolution) { } @@ -85,57 +85,6 @@ bool OccupancyGridMap::worldToMap(double wx, double wy, unsigned int & mx, unsig return false; } -void OccupancyGridMap::updateOrigin(double new_origin_x, double new_origin_y) -{ - // project the new origin into the grid - int cell_ox{static_cast(std::floor((new_origin_x - origin_x_) / resolution_))}; - int cell_oy{static_cast(std::floor((new_origin_y - origin_y_) / resolution_))}; - - // compute the associated world coordinates for the origin cell - // because we want to keep things grid-aligned - double new_grid_ox{origin_x_ + cell_ox * resolution_}; - double new_grid_oy{origin_y_ + cell_oy * resolution_}; - - // To save casting from unsigned int to int a bunch of times - int size_x{static_cast(size_x_)}; - int size_y{static_cast(size_y_)}; - - // we need to compute the overlap of the new and existing windows - int lower_left_x{std::min(std::max(cell_ox, 0), size_x)}; - int lower_left_y{std::min(std::max(cell_oy, 0), size_y)}; - int upper_right_x{std::min(std::max(cell_ox + size_x, 0), size_x)}; - int upper_right_y{std::min(std::max(cell_oy + size_y, 0), size_y)}; - - unsigned int cell_size_x = upper_right_x - lower_left_x; - unsigned int cell_size_y = upper_right_y - lower_left_y; - - // we need a map to store the obstacles in the window temporarily - unsigned char * local_map = new unsigned char[cell_size_x * cell_size_y]; - - // copy the local window in the costmap to the local map - copyMapRegion( - costmap_, lower_left_x, lower_left_y, size_x_, local_map, 0, 0, cell_size_x, cell_size_x, - cell_size_y); - - // now we'll set the costmap to be completely unknown if we track unknown space - resetMaps(); - - // update the origin with the appropriate world coordinates - origin_x_ = new_grid_ox; - origin_y_ = new_grid_oy; - - // compute the starting cell location for copying data back in - int start_x{lower_left_x - cell_ox}; - int start_y{lower_left_y - cell_oy}; - - // now we want to copy the overlapping information back into the map, but in its new location - copyMapRegion( - local_map, 0, 0, cell_size_x, costmap_, start_x, start_y, size_x_, cell_size_x, cell_size_y); - - // make sure to clean up - delete[] local_map; -} - void OccupancyGridMap::raytrace2D(const PointCloud2 & pointcloud, const Pose & robot_pose) { // freespace diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp index 39855ec36260c..1b242f22691cf 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 Tier IV, Inc. +// Copyright 2024 Tier IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -77,34 +77,35 @@ namespace costmap_2d using sensor_msgs::PointCloud2ConstIterator; OccupancyGridMapInterface::OccupancyGridMapInterface( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution) -: Costmap2D(cells_size_x, cells_size_y, resolution, 0.f, 0.f, cost_value::NO_INFORMATION) + const bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution) +: Costmap2D(cells_size_x, cells_size_y, resolution, 0.f, 0.f, cost_value::NO_INFORMATION), + use_cuda_(use_cuda) { - min_height_ = -std::numeric_limits::infinity(); - max_height_ = std::numeric_limits::infinity(); - resolution_inv_ = 1.0 / resolution_; - offset_initialized_ = false; -} - -inline bool OccupancyGridMapInterface::worldToMap( - double wx, double wy, unsigned int & mx, unsigned int & my) const -{ - if (wx < origin_x_ || wy < origin_y_) { - return false; - } - - mx = static_cast(std::floor((wx - origin_x_) * resolution_inv_)); - my = static_cast(std::floor((wy - origin_y_) * resolution_inv_)); - - if (mx < size_x_ && my < size_y_) { - return true; + if (use_cuda_) { + min_height_ = -std::numeric_limits::infinity(); + max_height_ = std::numeric_limits::infinity(); + resolution_inv_ = 1.0 / resolution_; + + const auto num_cells_x = this->getSizeInCellsX(); + const auto num_cells_y = this->getSizeInCellsY(); + + cudaStreamCreate(&stream_); + device_costmap_ = autoware::cuda_utils::make_unique(num_cells_x * num_cells_y); + device_costmap_aux_ = + autoware::cuda_utils::make_unique(num_cells_x * num_cells_y); + + device_rotation_map_ = autoware::cuda_utils::make_unique(); + device_translation_map_ = autoware::cuda_utils::make_unique(); + device_rotation_scan_ = autoware::cuda_utils::make_unique(); + device_translation_scan_ = autoware::cuda_utils::make_unique(); } - - return false; } void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_origin_y) { + using autoware::occupancy_grid_map::utils::copyMapRegionLaunch; + // project the new origin into the grid int cell_ox{static_cast(std::floor((new_origin_x - origin_x_) / resolution_))}; int cell_oy{static_cast(std::floor((new_origin_y - origin_y_) / resolution_))}; @@ -128,15 +129,26 @@ void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_ori unsigned int cell_size_y = upper_right_y - lower_left_y; // we need a map to store the obstacles in the window temporarily - unsigned char * local_map = new unsigned char[cell_size_x * cell_size_y]; + unsigned char * local_map{nullptr}; - // copy the local window in the costmap to the local map - copyMapRegion( - costmap_, lower_left_x, lower_left_y, size_x_, local_map, 0, 0, cell_size_x, cell_size_x, - cell_size_y); + if (use_cuda_) { + copyMapRegionLaunch( + device_costmap_.get(), lower_left_x, lower_left_y, size_x_, device_costmap_aux_.get(), 0, 0, + cell_size_x, cell_size_x, cell_size_y, stream_); - // now we'll set the costmap to be completely unknown if we track unknown space - resetMaps(); + cudaMemset( + device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t)); + } else { + local_map = new unsigned char[cell_size_x * cell_size_y]; + + // copy the local window in the costmap to the local map + copyMapRegion( + costmap_, lower_left_x, lower_left_y, size_x_, local_map, 0, 0, cell_size_x, cell_size_x, + cell_size_y); + + // now we'll set the costmap to be completely unknown if we track unknown space + nav2_costmap_2d::Costmap2D::resetMaps(); + } // update the origin with the appropriate world coordinates origin_x_ = new_grid_ox; @@ -147,93 +159,30 @@ void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_ori int start_y{lower_left_y - cell_oy}; // now we want to copy the overlapping information back into the map, but in its new location - copyMapRegion( - local_map, 0, 0, cell_size_x, costmap_, start_x, start_y, size_x_, cell_size_x, cell_size_y); - - // make sure to clean up - delete[] local_map; -} - -void OccupancyGridMapInterface::setCellValue( - const double wx, const double wy, const unsigned char cost) -{ - MarkCell marker(costmap_, cost); - unsigned int mx{}; - unsigned int my{}; - if (!worldToMap(wx, wy, mx, my)) { - RCLCPP_DEBUG(logger_, "Computing map coords failed"); - return; + if (use_cuda_) { + copyMapRegionLaunch( + device_costmap_aux_.get(), 0, 0, cell_size_x, device_costmap_.get(), start_x, start_y, + size_x_, cell_size_x, cell_size_y, stream_); + } else { + copyMapRegion( + local_map, 0, 0, cell_size_x, costmap_, start_x, start_y, size_x_, cell_size_x, cell_size_y); + + // make sure to clean up + if (local_map != nullptr) { + delete[] local_map; + } } - const unsigned int index = getIndex(mx, my); - marker(index); } -void OccupancyGridMapInterface::raytrace( - const double source_x, const double source_y, const double target_x, const double target_y, - const unsigned char cost) +void OccupancyGridMapInterface::resetMaps() { - unsigned int x0{}; - unsigned int y0{}; - const double ox{source_x}; - const double oy{source_y}; - if (!worldToMap(ox, oy, x0, y0)) { - RCLCPP_DEBUG( - logger_, - "The origin for the sensor at (%.2f, %.2f) is out of map bounds. So, the costmap cannot " - "raytrace for it.", - ox, oy); - return; + if (use_cuda_) { + cudaMemsetAsync( + device_costmap_.get(), cost_value::NO_INFORMATION, getSizeInCellsX() * getSizeInCellsY(), + stream_); + } else { + nav2_costmap_2d::Costmap2D::resetMaps(); } - - // we can pre-compute the endpoints of the map outside of the inner loop... we'll need these later - const double origin_x = origin_x_, origin_y = origin_y_; - const double map_end_x = origin_x + size_x_ * resolution_; - const double map_end_y = origin_y + size_y_ * resolution_; - - double wx = target_x; - double wy = target_y; - - // now we also need to make sure that the endpoint we're ray-tracing - // to isn't off the costmap and scale if necessary - const double a = wx - ox; - const double b = wy - oy; - - // the minimum value to raytrace from is the origin - if (wx < origin_x) { - const double t = (origin_x - ox) / a; - wx = origin_x; - wy = oy + b * t; - } - if (wy < origin_y) { - const double t = (origin_y - oy) / b; - wx = ox + a * t; - wy = origin_y; - } - - // the maximum value to raytrace to is the end of the map - if (wx > map_end_x) { - const double t = (map_end_x - ox) / a; - wx = map_end_x - .001; - wy = oy + b * t; - } - if (wy > map_end_y) { - const double t = (map_end_y - oy) / b; - wx = ox + a * t; - wy = map_end_y - .001; - } - - // now that the vector is scaled correctly... we'll get the map coordinates of its endpoint - unsigned int x1{}; - unsigned int y1{}; - - // check for legality just in case - if (!worldToMap(wx, wy, x1, y1)) { - return; - } - - constexpr unsigned int cell_raytrace_range = 10000; // large number to ignore range threshold - MarkCell marker(costmap_, cost); - raytraceLine(marker, x0, y0, x1, y1, cell_raytrace_range); } void OccupancyGridMapInterface::setHeightLimit(const double min_height, const double max_height) @@ -242,16 +191,15 @@ void OccupancyGridMapInterface::setHeightLimit(const double min_height, const do max_height_ = max_height; } -void OccupancyGridMapInterface::setFieldOffsets( - const PointCloud2 & input_raw, const PointCloud2 & input_obstacle) +bool OccupancyGridMapInterface::isCudaEnabled() const +{ + return use_cuda_; +} + +const autoware::cuda_utils::CudaUniquePtr & +OccupancyGridMapInterface::getDeviceCostmap() const { - x_offset_raw_ = input_raw.fields[pcl::getFieldIndex(input_raw, "x")].offset; - y_offset_raw_ = input_raw.fields[pcl::getFieldIndex(input_raw, "y")].offset; - z_offset_raw_ = input_raw.fields[pcl::getFieldIndex(input_raw, "z")].offset; - x_offset_obstacle_ = input_obstacle.fields[pcl::getFieldIndex(input_obstacle, "x")].offset; - y_offset_obstacle_ = input_obstacle.fields[pcl::getFieldIndex(input_obstacle, "y")].offset; - z_offset_obstacle_ = input_obstacle.fields[pcl::getFieldIndex(input_obstacle, "z")].offset; - offset_initialized_ = true; + return device_costmap_; } } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp index da22ef125fb19..53c96ec5503cd 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 Tier IV, Inc. +// Copyright 2024 Tier IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,8 +15,11 @@ #include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed.hpp" #include "autoware/probabilistic_occupancy_grid_map/cost_value/cost_value.hpp" +#include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed_kernel.hpp" #include "autoware/probabilistic_occupancy_grid_map/utils/utils.hpp" +#include "autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp" +#include #include #include #include @@ -41,9 +44,24 @@ namespace costmap_2d using sensor_msgs::PointCloud2ConstIterator; OccupancyGridMapFixedBlindSpot::OccupancyGridMapFixedBlindSpot( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution) -: OccupancyGridMapInterface(cells_size_x, cells_size_y, resolution) + const bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution) +: OccupancyGridMapInterface(use_cuda, cells_size_x, cells_size_y, resolution) { + if (use_cuda_) { + const size_t angle_bin_size = + ((max_angle_ - min_angle_) * angle_increment_inv_) + size_t(1 /*margin*/); + + const auto num_cells_x = this->getSizeInCellsX(); + const auto num_cells_y = this->getSizeInCellsY(); + const std::size_t range_bin_size = + static_cast(std::sqrt(2) * std::max(num_cells_x, num_cells_y) / 2.0) + 1; + + raw_points_tensor_ = + autoware::cuda_utils::make_unique(2 * angle_bin_size * range_bin_size); + obstacle_points_tensor_ = + autoware::cuda_utils::make_unique(2 * angle_bin_size * range_bin_size); + } } /** @@ -55,7 +73,7 @@ OccupancyGridMapFixedBlindSpot::OccupancyGridMapFixedBlindSpot( * @param scan_origin manually chosen grid map origin frame */ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud( - const PointCloud2 & raw_pointcloud, const PointCloud2 & obstacle_pointcloud, + const CudaPointCloud2 & raw_pointcloud, const CudaPointCloud2 & obstacle_pointcloud, const Pose & robot_pose, const Pose & scan_origin) { const size_t angle_bin_size = @@ -69,190 +87,77 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud( // Transform Matrix from map frame to scan frame mat_scan_ = utils::getTransformMatrix(scan2map_pose); - if (!offset_initialized_) { - setFieldOffsets(raw_pointcloud, obstacle_pointcloud); - } - - // Create angle bins and sort by distance - struct BinInfo - { - BinInfo() = default; - BinInfo(const double _range, const double _wx, const double _wy) - : range(_range), wx(_wx), wy(_wy) - { - } - double range{}; - double wx{}; - double wy{}; - }; - - std::vector> obstacle_pointcloud_angle_bins(angle_bin_size); - std::vector> raw_pointcloud_angle_bins(angle_bin_size); - - const size_t raw_pointcloud_size = raw_pointcloud.width * raw_pointcloud.height; - const size_t obstacle_pointcloud_size = obstacle_pointcloud.width * obstacle_pointcloud.height; - - // Reserve a certain amount of memory in advance for performance reasons - const size_t raw_reserve_size = raw_pointcloud_size / angle_bin_size; - const size_t obstacle_reserve_size = obstacle_pointcloud_size / angle_bin_size; - for (size_t i = 0; i < angle_bin_size; i++) { - raw_pointcloud_angle_bins[i].reserve(raw_reserve_size); - obstacle_pointcloud_angle_bins[i].reserve(obstacle_reserve_size); - } - - // Updated every loop inside transformPointAndCalculate() - Eigen::Vector4f pt_map; - int angle_bin_index; - double range; - - size_t global_offset = 0; - for (size_t i = 0; i < raw_pointcloud_size; ++i) { - Eigen::Vector4f pt( - *reinterpret_cast(&raw_pointcloud.data[global_offset + x_offset_raw_]), - *reinterpret_cast(&raw_pointcloud.data[global_offset + y_offset_raw_]), - *reinterpret_cast(&raw_pointcloud.data[global_offset + z_offset_raw_]), 1); - global_offset += raw_pointcloud.point_step; - if (!isPointValid(pt)) { - continue; - } - transformPointAndCalculate(pt, pt_map, angle_bin_index, range); - - raw_pointcloud_angle_bins.at(angle_bin_index).emplace_back(range, pt_map[0], pt_map[1]); - } - - for (auto & raw_pointcloud_angle_bin : raw_pointcloud_angle_bins) { - std::sort(raw_pointcloud_angle_bin.begin(), raw_pointcloud_angle_bin.end(), [](auto a, auto b) { - return a.range < b.range; - }); - } - - // Create obstacle angle bins and sort points by range - global_offset = 0; - for (size_t i = 0; i < obstacle_pointcloud_size; ++i) { - Eigen::Vector4f pt( - *reinterpret_cast( - &obstacle_pointcloud.data[global_offset + x_offset_obstacle_]), - *reinterpret_cast( - &obstacle_pointcloud.data[global_offset + y_offset_obstacle_]), - *reinterpret_cast( - &obstacle_pointcloud.data[global_offset + z_offset_obstacle_]), - 1); - global_offset += obstacle_pointcloud.point_step; - if (!isPointValid(pt)) { - continue; - } - transformPointAndCalculate(pt, pt_map, angle_bin_index, range); - - // Ignore obstacle points exceed the range of the raw points - // No raw point in this angle bin, or obstacle point exceeds the range of the raw points - if ( - raw_pointcloud_angle_bins.at(angle_bin_index).empty() || - range > raw_pointcloud_angle_bins.at(angle_bin_index).back().range) { - continue; - } - obstacle_pointcloud_angle_bins.at(angle_bin_index).emplace_back(range, pt_map[0], pt_map[1]); - } - - for (auto & obstacle_pointcloud_angle_bin : obstacle_pointcloud_angle_bins) { - std::sort( - obstacle_pointcloud_angle_bin.begin(), obstacle_pointcloud_angle_bin.end(), - [](auto a, auto b) { return a.range < b.range; }); - } - - // First step: Initialize cells to the final point with freespace - for (size_t bin_index = 0; bin_index < obstacle_pointcloud_angle_bins.size(); ++bin_index) { - const auto & raw_pointcloud_angle_bin = raw_pointcloud_angle_bins.at(bin_index); - - BinInfo end_distance; - if (raw_pointcloud_angle_bin.empty()) { - continue; - } else { - end_distance = raw_pointcloud_angle_bin.back(); - } - raytrace( - scan_origin.position.x, scan_origin.position.y, end_distance.wx, end_distance.wy, - cost_value::FREE_SPACE); - } - - // Second step: Add unknown cell - for (size_t bin_index = 0; bin_index < obstacle_pointcloud_angle_bins.size(); ++bin_index) { - const auto & obstacle_pointcloud_angle_bin = obstacle_pointcloud_angle_bins.at(bin_index); - const auto & raw_pointcloud_angle_bin = raw_pointcloud_angle_bins.at(bin_index); - auto raw_distance_iter = raw_pointcloud_angle_bin.begin(); - for (size_t dist_index = 0; dist_index < obstacle_pointcloud_angle_bin.size(); ++dist_index) { - // Calculate next raw point from obstacle point - while (raw_distance_iter != raw_pointcloud_angle_bin.end()) { - if ( - raw_distance_iter->range < - obstacle_pointcloud_angle_bin.at(dist_index).range + distance_margin_) - raw_distance_iter++; - else - break; - } - - // There is no point far than the obstacle point. - const bool no_freespace_point = (raw_distance_iter == raw_pointcloud_angle_bin.end()); - - if (dist_index + 1 == obstacle_pointcloud_angle_bin.size()) { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - if (!no_freespace_point) { - const auto & target = *raw_distance_iter; - raytrace(source.wx, source.wy, target.wx, target.wy, cost_value::NO_INFORMATION); - setCellValue(target.wx, target.wy, cost_value::FREE_SPACE); - } - continue; - } - - auto next_obstacle_point_distance = std::abs( - obstacle_pointcloud_angle_bin.at(dist_index + 1).range - - obstacle_pointcloud_angle_bin.at(dist_index).range); - if (next_obstacle_point_distance <= distance_margin_) { - continue; - } else if (no_freespace_point) { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - const auto & target = obstacle_pointcloud_angle_bin.at(dist_index + 1); - raytrace(source.wx, source.wy, target.wx, target.wy, cost_value::NO_INFORMATION); - continue; - } - - auto next_raw_distance = - std::abs(obstacle_pointcloud_angle_bin.at(dist_index).range - raw_distance_iter->range); - if (next_raw_distance < next_obstacle_point_distance) { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - const auto & target = *raw_distance_iter; - raytrace(source.wx, source.wy, target.wx, target.wy, cost_value::NO_INFORMATION); - setCellValue(target.wx, target.wy, cost_value::FREE_SPACE); - continue; - } else { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - const auto & target = obstacle_pointcloud_angle_bin.at(dist_index + 1); - raytrace(source.wx, source.wy, target.wx, target.wy, cost_value::NO_INFORMATION); - continue; - } - } - } - - // Third step: Overwrite occupied cell - for (const auto & obstacle_pointcloud_angle_bin : obstacle_pointcloud_angle_bins) { - for (size_t dist_index = 0; dist_index < obstacle_pointcloud_angle_bin.size(); ++dist_index) { - const auto & obstacle_point = obstacle_pointcloud_angle_bin.at(dist_index); - setCellValue(obstacle_point.wx, obstacle_point.wy, cost_value::LETHAL_OBSTACLE); - - if (dist_index + 1 == obstacle_pointcloud_angle_bin.size()) { - continue; - } - - auto next_obstacle_point_distance = std::abs( - obstacle_pointcloud_angle_bin.at(dist_index + 1).range - - obstacle_pointcloud_angle_bin.at(dist_index).range); - if (next_obstacle_point_distance <= distance_margin_) { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - const auto & target = obstacle_pointcloud_angle_bin.at(dist_index + 1); - raytrace(source.wx, source.wy, target.wx, target.wy, cost_value::LETHAL_OBSTACLE); - continue; - } - } - } + const auto map_res = this->getResolution(); + const auto num_cells_x = this->getSizeInCellsX(); + const auto num_cells_y = this->getSizeInCellsY(); + const std::size_t range_bin_size = + static_cast(std::sqrt(2) * std::max(num_cells_x, num_cells_y) / 2.0) + 1; + + cudaMemsetAsync( + raw_points_tensor_.get(), 0xFF, 2 * angle_bin_size * range_bin_size * sizeof(std::int64_t), + stream_); + cudaMemsetAsync( + obstacle_points_tensor_.get(), 0xFF, 2 * angle_bin_size * range_bin_size * sizeof(std::int64_t), + stream_); + cudaMemsetAsync( + device_costmap_.get(), cost_value::NO_INFORMATION, + num_cells_x * num_cells_y * sizeof(std::uint8_t), stream_); + + Eigen::Matrix3f rotation_map = mat_map_.block<3, 3>(0, 0); + Eigen::Vector3f translation_map = mat_map_.block<3, 1>(0, 3); + + Eigen::Matrix3f rotation_scan = mat_scan_.block<3, 3>(0, 0); + Eigen::Vector3f translation_scan = mat_scan_.block<3, 1>(0, 3); + + cudaMemcpyAsync( + device_rotation_map_.get(), &rotation_map, sizeof(Eigen::Matrix3f), cudaMemcpyHostToDevice, + stream_); + cudaMemcpyAsync( + device_translation_map_.get(), &translation_map, sizeof(Eigen::Vector3f), + cudaMemcpyHostToDevice, stream_); + cudaMemcpyAsync( + device_rotation_scan_.get(), &rotation_scan, sizeof(Eigen::Matrix3f), cudaMemcpyHostToDevice, + stream_); + cudaMemcpyAsync( + device_translation_scan_.get(), &translation_scan, sizeof(Eigen::Vector3f), + cudaMemcpyHostToDevice, stream_); + + const std::size_t num_raw_points = raw_pointcloud.width * raw_pointcloud.height; + float range_resolution_inv = 1.0 / map_res; + + map_fixed::prepareTensorLaunch( + reinterpret_cast(raw_pointcloud.data.get()), num_raw_points, + raw_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_, + max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(), + device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(), + raw_points_tensor_.get(), stream_); + + const std::size_t num_obstacle_points = obstacle_pointcloud.width * obstacle_pointcloud.height; + + map_fixed::prepareTensorLaunch( + reinterpret_cast(obstacle_pointcloud.data.get()), num_obstacle_points, + obstacle_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_, + max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(), + device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(), + obstacle_points_tensor_.get(), stream_); + + map_fixed::fillEmptySpaceLaunch( + raw_points_tensor_.get(), angle_bin_size, range_bin_size, range_resolution_inv, + scan_origin.position.x, scan_origin.position.y, origin_x_, origin_y_, num_cells_x, num_cells_y, + cost_value::FREE_SPACE, device_costmap_.get(), stream_); + + map_fixed::fillUnknownSpaceLaunch( + raw_points_tensor_.get(), obstacle_points_tensor_.get(), distance_margin_, angle_bin_size, + range_bin_size, range_resolution_inv, scan_origin.position.x, scan_origin.position.y, origin_x_, + origin_y_, num_cells_x, num_cells_y, cost_value::FREE_SPACE, cost_value::NO_INFORMATION, + device_costmap_.get(), stream_); + + map_fixed::fillObstaclesLaunch( + obstacle_points_tensor_.get(), distance_margin_, angle_bin_size, range_bin_size, + range_resolution_inv, origin_x_, origin_y_, num_cells_x, num_cells_y, + cost_value::LETHAL_OBSTACLE, device_costmap_.get(), stream_); + + cudaStreamSynchronize(stream_); } void OccupancyGridMapFixedBlindSpot::initRosParam(rclcpp::Node & node) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed_kernel.cu new file mode 100644 index 0000000000000..6d9be7a97da50 --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed_kernel.cu @@ -0,0 +1,383 @@ +// Copyright 2024 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_fixed_kernel.hpp" +#include "autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp" + +#include +#include + +#include +#include + +namespace autoware::occupancy_grid_map +{ +namespace costmap_2d::map_fixed +{ + +static constexpr float RANGE_DISCRETIZATION_RESOLUTION = 0.001f; + +__global__ void prepareTensorKernel( + const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, + const std::size_t angle_bins, const std::size_t range_bins, const float min_height, + const float max_height, const float min_angle, const float angle_increment_inv, + const float range_resolution_inv, const Eigen::Matrix3f * rotation_map, + const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, + const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_points) { + return; + } + + Eigen::Map point(input_pointcloud + idx * points_step); + + if ( + point.z() > max_height || point.z() < min_height || !isfinite(point.x()) || + !isfinite(point.y()) || !isfinite(point.z())) { + return; + } + + Eigen::Vector3f map_point = (*rotation_map) * point + (*translation_map); + Eigen::Vector3f scan_point = (*rotation_scan) * map_point + (*translation_scan); + + float angle = atan2(scan_point.y(), scan_point.x()); + int angle_bin_index = static_cast((angle - min_angle) * angle_increment_inv); + float range = sqrt(scan_point.y() * scan_point.y() + scan_point.x() * scan_point.x()); + int range_bin_index = static_cast(range * range_resolution_inv); + + if ( + angle_bin_index < 0 || angle_bin_index >= angle_bins || range_bin_index < 0 || + range_bin_index >= range_bins) { + return; + } + + std::uint64_t range_int = static_cast(range / RANGE_DISCRETIZATION_RESOLUTION); + std::uint32_t world_x_int = __float_as_uint(map_point.x()); + std::uint32_t world_y_int = __float_as_uint(map_point.y()); + + // Can not use std::uint64_t for cuda reasons... + static_assert( + sizeof(std::uint64_t) == sizeof(unsigned long long int), + "unsigned long long int is not 8 bytes"); + unsigned long long int range_and_x_int = (range_int << 32) | world_x_int; + unsigned long long int range_and_y_int = (range_int << 32) | world_y_int; + + std::uint64_t * element_address = + points_tensor + 2 * (angle_bin_index * range_bins + range_bin_index); + + atomicMin(reinterpret_cast(element_address), range_and_x_int); + atomicMin(reinterpret_cast(element_address + 1), range_and_y_int); +} + +__global__ void fillEmptySpaceKernel( + const std::uint64_t * points_tensor, const std::size_t angle_bins, const std::size_t range_bins, + const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t empty_value, std::uint8_t * costmap_tensor) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= angle_bins * range_bins) return; + + int angle_bin_index = idx / range_bins; + int range_bin_index = idx % range_bins; + + std::uint64_t range_and_x = + points_tensor[2 * (angle_bin_index * range_bins + range_bin_index) + 0]; + std::uint32_t range_int = range_and_x >> 32; + + if (range_int == 0xFFFFFFFF) { + return; + } + + std::uint64_t range_and_y = + points_tensor[2 * (angle_bin_index * range_bins + range_bin_index) + 1]; + float world_x = __uint_as_float(range_and_x & 0xFFFFFFFF); + float world_y = __uint_as_float(range_and_y & 0xFFFFFFFF); + + if (world_x < map_origin_x || world_y < map_origin_y) { + return; + } + + autoware::occupancy_grid_map::utils::raytrace( + scan_origin_x, scan_origin_y, world_x, world_y, map_origin_x, map_origin_y, map_resolution_inv, + num_cells_x, num_cells_y, empty_value, costmap_tensor); +} + +__global__ void fillUnknownSpaceKernel( + const std::uint64_t * raw_points_tensor, const std::uint64_t * obstacle_points_tensor, + const float distance_margin, const std::size_t angle_bins, const std::size_t range_bins, + const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t free_space_value, std::uint8_t no_information_value, std::uint8_t * costmap_tensor) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= angle_bins * range_bins) return; + + int angle_bin_index = idx / range_bins; + int range_bin_index = idx % range_bins; + + std::uint64_t obs_range_and_x = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + range_bin_index) + 0]; + std::uint64_t obs_range_and_y = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + range_bin_index) + 1]; + std::uint32_t obs_range_int = obs_range_and_x >> 32; + float obs_range = obs_range_int * RANGE_DISCRETIZATION_RESOLUTION; + float obs_world_x = __uint_as_float(obs_range_and_x & 0xFFFFFFFF); + float obs_world_y = __uint_as_float(obs_range_and_y & 0xFFFFFFFF); + + if (obs_range_int == 0xFFFFFFFF || obs_world_x < map_origin_x || obs_world_y < map_origin_y) { + return; + } + + int next_raw_range_bin_index = range_bin_index + 1; + int next_obs_range_bin_index = range_bin_index + 1; + + for (; next_raw_range_bin_index < range_bins; next_raw_range_bin_index++) { + std::uint64_t next_raw_range_and_x = + raw_points_tensor[2 * (angle_bin_index * range_bins + next_raw_range_bin_index) + 0]; + std::uint32_t next_raw_range_int = next_raw_range_and_x >> 32; + float next_raw_range = next_raw_range_int * RANGE_DISCRETIZATION_RESOLUTION; + + if (next_raw_range_int != 0xFFFFFFFF && abs(next_raw_range - obs_range) > distance_margin) { + break; + } + } + + for (; next_obs_range_bin_index < range_bins; next_obs_range_bin_index++) { + std::uint64_t next_obs_range_and_x = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 0]; + std::uint32_t next_obs_range_int = next_obs_range_and_x >> 32; + + if (next_obs_range_int != 0xFFFFFFFF) { + break; + } + } + + const std::uint64_t next_obs_range_and_x = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 0]; + const std::uint64_t next_obs_range_and_y = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 1]; + const std::uint32_t next_obs_range_int = next_obs_range_and_x >> 32; + const float next_obs_world_x = __uint_as_float(next_obs_range_and_x & 0xFFFFFFFF); + const float next_obs_world_y = __uint_as_float(next_obs_range_and_y & 0xFFFFFFFF); + + const std::uint64_t next_raw_range_and_x = + raw_points_tensor[2 * (angle_bin_index * range_bins + next_raw_range_bin_index) + 0]; + const std::uint64_t next_raw_range_and_y = + raw_points_tensor[2 * (angle_bin_index * range_bins + next_raw_range_bin_index) + 1]; + const std::uint32_t next_raw_range_int = next_raw_range_and_x >> 32; + const float next_raw_world_x = __uint_as_float(next_raw_range_and_x & 0xFFFFFFFF); + const float next_raw_world_y = __uint_as_float(next_raw_range_and_y & 0xFFFFFFFF); + + if (next_obs_range_int == 0xFFFFFFFF) { + if ( + next_raw_range_int == 0xFFFFFFFF || next_raw_world_x < map_origin_x || + next_raw_world_y < map_origin_y) { + return; + } + + // if there is no more obstacles after the current one but there are more raw points + // the space between the current obstacle and the next raw point flagged as no_information_value + autoware::occupancy_grid_map::utils::raytrace( + obs_world_x, obs_world_y, next_raw_world_x, next_raw_world_y, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, no_information_value, costmap_tensor); + + autoware::occupancy_grid_map::utils::setCellValue( + next_raw_world_x, next_raw_world_y, map_origin_x, map_origin_y, map_resolution_inv, + num_cells_x, num_cells_y, free_space_value, costmap_tensor); + return; + } + + float next_obs_range = next_obs_range_int * RANGE_DISCRETIZATION_RESOLUTION; + float obs_to_obs_distance = next_obs_range - obs_range; + + if (obs_to_obs_distance <= distance_margin) { + return; + } else if (next_raw_range_int == 0xFFFFFFFF) { + // fill with no information between obstacles + + if (next_obs_world_x < map_origin_x || next_obs_world_y < map_origin_y) { + return; + } + + autoware::occupancy_grid_map::utils::raytrace( + obs_world_x, obs_world_y, next_obs_world_x, next_obs_world_y, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, no_information_value, costmap_tensor); + + return; + } + + float next_raw_range = next_raw_range_int * RANGE_DISCRETIZATION_RESOLUTION; + float raw_to_obs_distance = abs(next_raw_range - obs_range); + + if (raw_to_obs_distance < obs_to_obs_distance) { + // fill with free space between raw and obstacle + + if (next_raw_world_x < map_origin_x || next_raw_world_y < map_origin_y) { + return; + } + + autoware::occupancy_grid_map::utils::raytrace( + obs_world_x, obs_world_y, next_raw_world_x, next_raw_world_y, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, no_information_value, costmap_tensor); + + autoware::occupancy_grid_map::utils::setCellValue( + next_raw_world_x, next_raw_world_y, map_origin_x, map_origin_y, map_resolution_inv, + num_cells_x, num_cells_y, free_space_value, costmap_tensor); + return; + } else { + // fill with no information between obstacles + + if (next_obs_world_x < map_origin_x || next_obs_world_y < map_origin_y) { + return; + } + + autoware::occupancy_grid_map::utils::raytrace( + obs_world_x, obs_world_y, next_obs_world_x, next_obs_world_y, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, no_information_value, costmap_tensor); + + return; + } +} + +__global__ void fillObstaclesKernel( + const std::uint64_t * obstacle_points_tensor, const float distance_margin, + const std::size_t angle_bins, const std::size_t range_bins, const float map_resolution_inv, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t obstacle_value, std::uint8_t * costmap_tensor) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= angle_bins * range_bins) return; + + int angle_bin_index = idx / range_bins; + int range_bin_index = idx % range_bins; + + std::uint64_t range_and_x = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + range_bin_index) + 0]; + std::uint64_t range_and_y = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + range_bin_index) + 1]; + + std::uint32_t range_int = range_and_x >> 32; + float range = range_int * RANGE_DISCRETIZATION_RESOLUTION; + float world_x = __uint_as_float(range_and_x & 0xFFFFFFFF); + float world_y = __uint_as_float(range_and_y & 0xFFFFFFFF); + + if (range < 0.0 || range_int == 0xFFFFFFFF) { + return; + } + + autoware::occupancy_grid_map::utils::setCellValue( + world_x, world_y, map_origin_x, map_origin_y, map_resolution_inv, num_cells_x, num_cells_y, + obstacle_value, costmap_tensor); + + // Look for the next obstacle point + int next_obs_range_bin_index = range_bin_index + 1; + for (; next_obs_range_bin_index < range_bins; next_obs_range_bin_index++) { + std::uint64_t next_obs_range_and_x = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 0]; + std::uint32_t next_obs_range_int = next_obs_range_and_x >> 32; + + if (next_obs_range_int != 0xFFFFFFFF) { + break; + } + } + + std::uint64_t next_obs_range_and_x = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + range_bin_index) + 0]; + std::uint64_t next_obs_range_and_y = + obstacle_points_tensor[2 * (angle_bin_index * range_bins + range_bin_index) + 1]; + + std::uint32_t next_obs_range_int = next_obs_range_and_x >> 32; + float next_obs_range = next_obs_range_int * RANGE_DISCRETIZATION_RESOLUTION; + float next_obs_world_x = __uint_as_float(next_obs_range_and_x & 0xFFFFFFFF); + float next_obs_world_y = __uint_as_float(next_obs_range_and_y & 0xFFFFFFFF); + + if (next_obs_range_int == 0xFFFFFFFF || abs(next_obs_range - range) > distance_margin) { + return; + } + + if (next_obs_world_x < map_origin_x || next_obs_world_y < map_origin_y) { + return; + } + + autoware::occupancy_grid_map::utils::raytrace( + world_x, world_y, next_obs_world_x, next_obs_world_y, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, obstacle_value, costmap_tensor); +} + +void prepareTensorLaunch( + const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, + const std::size_t angle_bins, const std::size_t range_bins, const float min_height, + const float max_height, const float min_angle, const float angle_increment_inv, + const float range_resolution_inv, const Eigen::Matrix3f * rotation_map, + const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, + const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor, cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (num_points + threadsPerBlock - 1) / threadsPerBlock; + + prepareTensorKernel<<>>( + input_pointcloud, num_points, points_step, angle_bins, range_bins, min_height, max_height, + min_angle, angle_increment_inv, range_resolution_inv, rotation_map, translation_map, + rotation_scan, translation_scan, points_tensor); +} + +void fillEmptySpaceLaunch( + const std::uint64_t * points_tensor, const std::size_t angle_bins, const std::size_t range_bins, + const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t empty_value, std::uint8_t * costmap_tensor, cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (angle_bins * range_bins + threadsPerBlock - 1) / threadsPerBlock; + + fillEmptySpaceKernel<<>>( + points_tensor, angle_bins, range_bins, map_resolution_inv, scan_origin_x, scan_origin_y, + map_origin_x, map_origin_y, num_cells_x, num_cells_y, empty_value, costmap_tensor); +} + +void fillUnknownSpaceLaunch( + const std::uint64_t * raw_points_tensor, const std::uint64_t * obstacle_points_tensor, + const float distance_margin, const std::size_t angle_bins, const std::size_t range_bins, + const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t free_space_value, std::uint8_t no_information_value, std::uint8_t * costmap_tensor, + cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (angle_bins * range_bins + threadsPerBlock - 1) / threadsPerBlock; + + fillUnknownSpaceKernel<<>>( + raw_points_tensor, obstacle_points_tensor, distance_margin, angle_bins, range_bins, + map_resolution_inv, scan_origin_x, scan_origin_y, map_origin_x, map_origin_y, num_cells_x, + num_cells_y, free_space_value, no_information_value, costmap_tensor); +} + +void fillObstaclesLaunch( + const std::uint64_t * obstacle_points_tensor, const float distance_margin, + const std::size_t angle_bins, const std::size_t range_bins, const float map_resolution_inv, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t obstacle_value, std::uint8_t * costmap_tensor, cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (angle_bins * range_bins + threadsPerBlock - 1) / threadsPerBlock; + + fillObstaclesKernel<<>>( + obstacle_points_tensor, distance_margin, angle_bins, range_bins, map_resolution_inv, + map_origin_x, map_origin_y, num_cells_x, num_cells_y, obstacle_value, costmap_tensor); +} + +} // namespace costmap_2d::map_fixed +} // namespace autoware::occupancy_grid_map diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective.cpp index 4f5b8b8eeabed..5bc30872fcf70 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 Tier IV, Inc. +// Copyright 2024 Tier IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,23 +15,17 @@ #include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective.hpp" #include "autoware/probabilistic_occupancy_grid_map/cost_value/cost_value.hpp" +#include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective_kernel.hpp" #include "autoware/probabilistic_occupancy_grid_map/utils/utils.hpp" #include #include #include #include - -#include - -#ifdef ROS_DISTRO_GALACTIC -#include -#include -#else #include +#include #include -#endif #include @@ -42,9 +36,25 @@ namespace costmap_2d using sensor_msgs::PointCloud2ConstIterator; OccupancyGridMapProjectiveBlindSpot::OccupancyGridMapProjectiveBlindSpot( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution) -: OccupancyGridMapInterface(cells_size_x, cells_size_y, resolution) + const bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution) +: OccupancyGridMapInterface(use_cuda, cells_size_x, cells_size_y, resolution) { + if (use_cuda) { + const size_t angle_bin_size = + ((max_angle_ - min_angle_) * angle_increment_inv_) + size_t(1 /*margin*/); + + const auto num_cells_x = this->getSizeInCellsX(); + const auto num_cells_y = this->getSizeInCellsY(); + const std::size_t range_bin_size = + static_cast(std::sqrt(2) * std::max(num_cells_x, num_cells_y) / 2.0) + 1; + + raw_points_tensor_ = + autoware::cuda_utils::make_unique(7 * angle_bin_size * range_bin_size); + obstacle_points_tensor_ = + autoware::cuda_utils::make_unique(7 * angle_bin_size * range_bin_size); + device_translation_scan_origin_ = autoware::cuda_utils::make_unique(); + } } /** @@ -56,7 +66,7 @@ OccupancyGridMapProjectiveBlindSpot::OccupancyGridMapProjectiveBlindSpot( * @param scan_origin manually chosen grid map origin frame */ void OccupancyGridMapProjectiveBlindSpot::updateWithPointCloud( - const PointCloud2 & raw_pointcloud, const PointCloud2 & obstacle_pointcloud, + const CudaPointCloud2 & raw_pointcloud, const CudaPointCloud2 & obstacle_pointcloud, const Pose & robot_pose, const Pose & scan_origin) { const size_t angle_bin_size = @@ -67,262 +77,96 @@ void OccupancyGridMapProjectiveBlindSpot::updateWithPointCloud( const auto scan2map_pose = utils::getInversePose(scan_origin); // scan -> map transform pose - // Transform from map frame to scan frame + // Transform Matrix from map frame to scan frame mat_scan_ = utils::getTransformMatrix(scan2map_pose); - if (!offset_initialized_) { - setFieldOffsets(raw_pointcloud, obstacle_pointcloud); - } - - // Create angle bins and sort points by range - struct BinInfo3D - { - explicit BinInfo3D( - const double _range = 0.0, const double _wx = 0.0, const double _wy = 0.0, - const double _wz = 0.0, const double _projection_length = 0.0, - const double _projected_wx = 0.0, const double _projected_wy = 0.0) - : range(_range), - wx(_wx), - wy(_wy), - wz(_wz), - projection_length(_projection_length), - projected_wx(_projected_wx), - projected_wy(_projected_wy) - { - } - double range; - double wx; - double wy; - double wz; - double projection_length; - double projected_wx; - double projected_wy; - }; - - std::vector> obstacle_pointcloud_angle_bins(angle_bin_size); - std::vector> raw_pointcloud_angle_bins(angle_bin_size); - - const size_t raw_pointcloud_size = raw_pointcloud.width * raw_pointcloud.height; - const size_t obstacle_pointcloud_size = obstacle_pointcloud.width * obstacle_pointcloud.height; - - // Updated every loop inside transformPointAndCalculate() - Eigen::Vector4f pt_map; - int angle_bin_index; - double range; - - size_t global_offset = 0; - for (size_t i = 0; i < raw_pointcloud_size; i++) { - Eigen::Vector4f pt( - *reinterpret_cast(&raw_pointcloud.data[global_offset + x_offset_raw_]), - *reinterpret_cast(&raw_pointcloud.data[global_offset + y_offset_raw_]), - *reinterpret_cast(&raw_pointcloud.data[global_offset + z_offset_raw_]), 1); - global_offset += raw_pointcloud.point_step; - if (!isPointValid(pt)) { - continue; - } - transformPointAndCalculate(pt, pt_map, angle_bin_index, range); - - raw_pointcloud_angle_bins.at(angle_bin_index) - .emplace_back(range, pt_map[0], pt_map[1], pt_map[2]); - } - - for (auto & raw_pointcloud_angle_bin : raw_pointcloud_angle_bins) { - std::sort(raw_pointcloud_angle_bin.begin(), raw_pointcloud_angle_bin.end(), [](auto a, auto b) { - return a.range < b.range; - }); - } - - // Create obstacle angle bins and sort points by range - global_offset = 0; - for (size_t i = 0; i < obstacle_pointcloud_size; i++) { - Eigen::Vector4f pt( - *reinterpret_cast( - &obstacle_pointcloud.data[global_offset + x_offset_obstacle_]), - *reinterpret_cast( - &obstacle_pointcloud.data[global_offset + y_offset_obstacle_]), - *reinterpret_cast( - &obstacle_pointcloud.data[global_offset + z_offset_obstacle_]), - 1); - global_offset += obstacle_pointcloud.point_step; - if (!isPointValid(pt)) { - continue; - } - transformPointAndCalculate(pt, pt_map, angle_bin_index, range); - const double scan_z = scan_origin.position.z - robot_pose.position.z; - const double obstacle_z = (pt_map[2]) - robot_pose.position.z; - const double dz = scan_z - obstacle_z; - - // Ignore obstacle points exceed the range of the raw points - // No raw point in this angle bin, or obstacle point exceeds the range of the raw points - if ( - raw_pointcloud_angle_bins.at(angle_bin_index).empty() || - range > raw_pointcloud_angle_bins.at(angle_bin_index).back().range) { - continue; - } - - if (dz > projection_dz_threshold_) { - const double ratio = obstacle_z / dz; - const double projection_length = range * ratio; - const double projected_wx = (pt_map[0]) + ((pt_map[0]) - scan_origin.position.x) * ratio; - const double projected_wy = (pt_map[1]) + ((pt_map[1]) - scan_origin.position.y) * ratio; - obstacle_pointcloud_angle_bins.at(angle_bin_index) - .emplace_back( - range, pt_map[0], pt_map[1], pt_map[2], projection_length, projected_wx, projected_wy); - } else { - obstacle_pointcloud_angle_bins.at(angle_bin_index) - .emplace_back( - range, pt_map[0], pt_map[1], pt_map[2], std::numeric_limits::infinity(), - std::numeric_limits::infinity(), std::numeric_limits::infinity()); - } - } - - for (auto & obstacle_pointcloud_angle_bin : obstacle_pointcloud_angle_bins) { - std::sort( - obstacle_pointcloud_angle_bin.begin(), obstacle_pointcloud_angle_bin.end(), - [](auto a, auto b) { return a.range < b.range; }); - } - - grid_map::Costmap2DConverter converter; - if (pub_debug_grid_) { - debug_grid_.clearAll(); - debug_grid_.setFrameId("map"); - debug_grid_.setGeometry( - grid_map::Length(size_x_ * resolution_, size_y_ * resolution_), resolution_, - grid_map::Position( - origin_x_ + size_x_ * resolution_ / 2.0, origin_y_ + size_y_ * resolution_ / 2.0)); - } - - auto is_visible_beyond_obstacle = [&](const BinInfo3D & obstacle, const BinInfo3D & raw) -> bool { - if (raw.range < obstacle.range) { - return false; - } - - if (std::isinf(obstacle.projection_length)) { - return false; - } - - // y = ax + b - const double a = -(scan_origin.position.z - robot_pose.position.z) / - (obstacle.range + obstacle.projection_length); - const double b = scan_origin.position.z; - return raw.wz > (a * raw.range + b); - }; - - // First step: Initialize cells to the final point with freespace - for (size_t bin_index = 0; bin_index < obstacle_pointcloud_angle_bins.size(); ++bin_index) { - const auto & raw_pointcloud_angle_bin = raw_pointcloud_angle_bins.at(bin_index); - - BinInfo3D ray_end; - if (raw_pointcloud_angle_bin.empty()) { - continue; - } else { - ray_end = raw_pointcloud_angle_bin.back(); - } - raytrace( - scan_origin.position.x, scan_origin.position.y, ray_end.wx, ray_end.wy, - cost_value::FREE_SPACE); - } - - if (pub_debug_grid_) - converter.addLayerFromCostmap2D(*this, "filled_free_to_farthest", debug_grid_); - - // Second step: Add unknown cell - for (size_t bin_index = 0; bin_index < obstacle_pointcloud_angle_bins.size(); ++bin_index) { - const auto & obstacle_pointcloud_angle_bin = obstacle_pointcloud_angle_bins.at(bin_index); - const auto & raw_pointcloud_angle_bin = raw_pointcloud_angle_bins.at(bin_index); - auto raw_distance_iter = raw_pointcloud_angle_bin.begin(); - for (size_t dist_index = 0; dist_index < obstacle_pointcloud_angle_bin.size(); ++dist_index) { - // Calculate next raw point from obstacle point - const auto & obstacle_bin = obstacle_pointcloud_angle_bin.at(dist_index); - while (raw_distance_iter != raw_pointcloud_angle_bin.end()) { - if (!is_visible_beyond_obstacle(obstacle_bin, *raw_distance_iter)) - raw_distance_iter++; - else - break; - } - - // There is no point farther than the obstacle point. - const bool no_visible_point_beyond = (raw_distance_iter == raw_pointcloud_angle_bin.end()); - if (no_visible_point_beyond) { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - raytrace( - source.wx, source.wy, source.projected_wx, source.projected_wy, - cost_value::NO_INFORMATION); - break; - } - - if (dist_index + 1 == obstacle_pointcloud_angle_bin.size()) { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - raytrace( - source.wx, source.wy, source.projected_wx, source.projected_wy, - cost_value::NO_INFORMATION); - continue; - } - - auto next_obstacle_point_distance = std::abs( - obstacle_pointcloud_angle_bin.at(dist_index + 1).range - - obstacle_pointcloud_angle_bin.at(dist_index).range); - if (next_obstacle_point_distance <= obstacle_separation_threshold_) { - continue; - } - - auto next_raw_distance = - std::abs(obstacle_pointcloud_angle_bin.at(dist_index).range - raw_distance_iter->range); - if (next_raw_distance < next_obstacle_point_distance) { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - const auto & target = *raw_distance_iter; - raytrace(source.wx, source.wy, target.wx, target.wy, cost_value::NO_INFORMATION); - setCellValue(target.wx, target.wy, cost_value::FREE_SPACE); - continue; - } else { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - const auto & target = obstacle_pointcloud_angle_bin.at(dist_index + 1); - raytrace(source.wx, source.wy, target.wx, target.wy, cost_value::NO_INFORMATION); - continue; - } - } - } - - if (pub_debug_grid_) converter.addLayerFromCostmap2D(*this, "added_unknown", debug_grid_); - - // Third step: Overwrite occupied cell - for (const auto & obstacle_pointcloud_angle_bin : obstacle_pointcloud_angle_bins) { - for (size_t dist_index = 0; dist_index < obstacle_pointcloud_angle_bin.size(); ++dist_index) { - const auto & obstacle_point = obstacle_pointcloud_angle_bin.at(dist_index); - setCellValue(obstacle_point.wx, obstacle_point.wy, cost_value::LETHAL_OBSTACLE); - - if (dist_index + 1 == obstacle_pointcloud_angle_bin.size()) { - continue; - } - - auto next_obstacle_point_distance = std::abs( - obstacle_pointcloud_angle_bin.at(dist_index + 1).range - - obstacle_pointcloud_angle_bin.at(dist_index).range); - if (next_obstacle_point_distance <= obstacle_separation_threshold_) { - const auto & source = obstacle_pointcloud_angle_bin.at(dist_index); - const auto & target = obstacle_pointcloud_angle_bin.at(dist_index + 1); - raytrace(source.wx, source.wy, target.wx, target.wy, cost_value::LETHAL_OBSTACLE); - continue; - } - } - } - - if (pub_debug_grid_) converter.addLayerFromCostmap2D(*this, "added_obstacle", debug_grid_); - if (pub_debug_grid_) { - debug_grid_map_publisher_ptr_->publish(grid_map::GridMapRosConverter::toMessage(debug_grid_)); - } + const auto map_res = this->getResolution(); + const auto num_cells_x = this->getSizeInCellsX(); + const auto num_cells_y = this->getSizeInCellsY(); + const std::size_t range_bin_size = + static_cast(std::sqrt(2) * std::max(num_cells_x, num_cells_y) / 2.0) + 1; + + cudaMemsetAsync( + raw_points_tensor_.get(), 0xFF, 6 * angle_bin_size * range_bin_size * sizeof(std::int64_t), + stream_); + cudaMemsetAsync( + obstacle_points_tensor_.get(), 0xFF, 6 * angle_bin_size * range_bin_size * sizeof(std::int64_t), + stream_); + cudaMemsetAsync( + device_costmap_.get(), cost_value::NO_INFORMATION, + num_cells_x * num_cells_y * sizeof(std::uint8_t), stream_); + + Eigen::Matrix3f rotation_map = mat_map_.block<3, 3>(0, 0); + Eigen::Vector3f translation_map = mat_map_.block<3, 1>(0, 3); + + Eigen::Matrix3f rotation_scan = mat_scan_.block<3, 3>(0, 0); + Eigen::Vector3f translation_scan = mat_scan_.block<3, 1>(0, 3); + + Eigen::Vector3f scan_origin_position( + scan_origin.position.x, scan_origin.position.y, scan_origin.position.z); + + cudaMemcpyAsync( + device_rotation_map_.get(), &rotation_map, sizeof(Eigen::Matrix3f), cudaMemcpyHostToDevice, + stream_); + cudaMemcpyAsync( + device_translation_map_.get(), &translation_map, sizeof(Eigen::Vector3f), + cudaMemcpyHostToDevice, stream_); + cudaMemcpyAsync( + device_rotation_scan_.get(), &rotation_scan, sizeof(Eigen::Matrix3f), cudaMemcpyHostToDevice, + stream_); + cudaMemcpyAsync( + device_translation_scan_.get(), &translation_scan, sizeof(Eigen::Vector3f), + cudaMemcpyHostToDevice, stream_); + cudaMemcpyAsync( + device_translation_scan_origin_.get(), &scan_origin_position, sizeof(Eigen::Vector3f), + cudaMemcpyHostToDevice, stream_); + + const std::size_t num_raw_points = raw_pointcloud.width * raw_pointcloud.height; + float range_resolution_inv = 1.0 / map_res; + + map_projective::prepareRawTensorLaunch( + reinterpret_cast(raw_pointcloud.data.get()), num_raw_points, + raw_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_, + max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(), + device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(), + raw_points_tensor_.get(), stream_); + + const std::size_t num_obstacle_points = obstacle_pointcloud.width * obstacle_pointcloud.height; + + map_projective::prepareObstacleTensorLaunch( + reinterpret_cast(obstacle_pointcloud.data.get()), num_obstacle_points, + obstacle_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_, + max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, projection_dz_threshold_, + device_translation_scan_origin_.get(), device_rotation_map_.get(), + device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(), + obstacle_points_tensor_.get(), stream_); + + map_projective::fillEmptySpaceLaunch( + raw_points_tensor_.get(), angle_bin_size, range_bin_size, range_resolution_inv, + scan_origin.position.x, scan_origin.position.y, origin_x_, origin_y_, num_cells_x, num_cells_y, + cost_value::FREE_SPACE, device_costmap_.get(), stream_); + + map_projective::fillUnknownSpaceLaunch( + raw_points_tensor_.get(), obstacle_points_tensor_.get(), obstacle_separation_threshold_, + angle_bin_size, range_bin_size, range_resolution_inv, scan_origin.position.x, + scan_origin.position.y, scan_origin.position.z, origin_x_, origin_y_, robot_pose.position.z, + num_cells_x, num_cells_y, cost_value::FREE_SPACE, cost_value::NO_INFORMATION, + device_costmap_.get(), stream_); + + map_projective::fillObstaclesLaunch( + obstacle_points_tensor_.get(), obstacle_separation_threshold_, angle_bin_size, range_bin_size, + range_resolution_inv, origin_x_, origin_y_, num_cells_x, num_cells_y, + cost_value::LETHAL_OBSTACLE, device_costmap_.get(), stream_); + + cudaStreamSynchronize(stream_); } void OccupancyGridMapProjectiveBlindSpot::initRosParam(rclcpp::Node & node) { projection_dz_threshold_ = - node.declare_parameter("OccupancyGridMapProjectiveBlindSpot.projection_dz_threshold"); - obstacle_separation_threshold_ = node.declare_parameter( + node.declare_parameter("OccupancyGridMapProjectiveBlindSpot.projection_dz_threshold"); + obstacle_separation_threshold_ = node.declare_parameter( "OccupancyGridMapProjectiveBlindSpot.obstacle_separation_threshold"); - pub_debug_grid_ = - node.declare_parameter("OccupancyGridMapProjectiveBlindSpot.pub_debug_grid"); - debug_grid_map_publisher_ptr_ = node.create_publisher( - "~/debug/grid_map", rclcpp::QoS(1).durability_volatile()); } } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu new file mode 100644 index 0000000000000..d9e5e14b8506d --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu @@ -0,0 +1,538 @@ +// Copyright 2024 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_projective_kernel.hpp" +#include "autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp" + +#include +#include + +#include +#include +#include + +namespace autoware::occupancy_grid_map +{ +namespace costmap_2d::map_projective +{ + +static constexpr float RANGE_DISCRETIZATION_RESOLUTION = 0.001f; + +bool __device__ isVisibleBeyondObstacle( + const std::uint64_t * obstacle_element, const std::uint64_t * raw_element, + const float & scan_origin_z, const float & robot_pose_z) +{ + std::uint64_t raw_range_and_z = raw_element[2]; + std::uint32_t raw_range_int = raw_range_and_z >> 32; + float raw_range = raw_range_int * RANGE_DISCRETIZATION_RESOLUTION; + float raw_world_z = __uint_as_float(raw_range_and_z & 0xFFFFFFFF); + + std::uint64_t obstacle_range_and_pl = raw_element[3]; + std::uint32_t obstacle_range_int = obstacle_range_and_pl >> 32; + float obstacle_range = obstacle_range_int * RANGE_DISCRETIZATION_RESOLUTION; + float obstacle_pl = __uint_as_float(obstacle_range_and_pl & 0xFFFFFFFF); + + if (raw_range < obstacle_range) { + return false; + } + + if (std::isinf(obstacle_pl)) { + return false; + } + + // y = ax + b + const double a = -(scan_origin_z - robot_pose_z) / (obstacle_range + obstacle_pl); + const double b = scan_origin_z; + return raw_world_z > (a * raw_range + b); +}; + +__global__ void prepareRawTensorKernel( + const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, + const std::size_t angle_bins, const std::size_t range_bins, const float min_height, + const float max_height, const float min_angle, const float angle_increment_inv, + const float range_resolution_inv, const Eigen::Matrix3f * rotation_map, + const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, + const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_points) { + return; + } + + Eigen::Map point(input_pointcloud + idx * points_step); + + if ( + point.z() > max_height || point.z() < min_height || !isfinite(point.x()) || + !isfinite(point.y()) || !isfinite(point.z())) { + return; + } + + Eigen::Vector3f map_point = (*rotation_map) * point + (*translation_map); + Eigen::Vector3f scan_point = (*rotation_scan) * map_point + (*translation_scan); + + float angle = atan2(scan_point.y(), scan_point.x()); + int angle_bin_index = static_cast((angle - min_angle) * angle_increment_inv); + float range = sqrt(scan_point.y() * scan_point.y() + scan_point.x() * scan_point.x()); + int range_bin_index = static_cast(range * range_resolution_inv); + + if ( + angle_bin_index < 0 || angle_bin_index >= angle_bins || range_bin_index < 0 || + range_bin_index >= range_bins) { + return; + } + + std::uint64_t range_int = static_cast(range / RANGE_DISCRETIZATION_RESOLUTION); + std::uint32_t world_x_int = __float_as_uint(map_point.x()); + std::uint32_t world_y_int = __float_as_uint(map_point.y()); + std::uint32_t world_z_int = __float_as_uint(map_point.z()); + std::uint32_t projected_length_int = __float_as_uint(0.f); + std::uint32_t projected_world_x_int = __float_as_uint(0.f); + std::uint32_t projected_world_y_int = __float_as_uint(0.f); + + // Can not use std::uint64_t for cuda reasons... + static_assert( + sizeof(std::uint64_t) == sizeof(unsigned long long int), + "unsigned long long int is not 8 bytes"); + unsigned long long int range_and_x_int = (range_int << 32) | world_x_int; + unsigned long long int range_and_y_int = (range_int << 32) | world_y_int; + unsigned long long int range_and_z_int = (range_int << 32) | world_z_int; + unsigned long long int range_and_pl_int = (range_int << 32) | projected_length_int; + unsigned long long int range_and_px_int = (range_int << 32) | projected_world_x_int; + unsigned long long int range_and_py_int = (range_int << 32) | projected_world_y_int; + + std::uint64_t * element_address = + points_tensor + 6 * (angle_bin_index * range_bins + range_bin_index); + + atomicMin(reinterpret_cast(element_address + 0), range_and_x_int); + atomicMin(reinterpret_cast(element_address + 1), range_and_y_int); + atomicMin(reinterpret_cast(element_address + 2), range_and_z_int); + atomicMin(reinterpret_cast(element_address + 3), range_and_pl_int); + atomicMin(reinterpret_cast(element_address + 4), range_and_px_int); + atomicMin(reinterpret_cast(element_address + 5), range_and_py_int); +} + +__global__ void prepareObstacleTensorKernel( + const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, + const std::size_t angle_bins, const std::size_t range_bins, const float min_height, + const float max_height, const float min_angle, const float angle_increment_inv, + const float range_resolution_inv, const float projection_dz_threshold, + const Eigen::Vector3f * translation_scan_origin, const Eigen::Matrix3f * rotation_map, + const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, + const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_points) { + return; + } + + Eigen::Map point(input_pointcloud + idx * points_step); + + if ( + point.z() > max_height || point.z() < min_height || !isfinite(point.x()) || + !isfinite(point.y()) || !isfinite(point.z())) { + return; + } + + Eigen::Vector3f map_point = (*rotation_map) * point + (*translation_map); + Eigen::Vector3f scan_point = (*rotation_scan) * map_point + (*translation_scan); + + float angle = atan2(scan_point.y(), scan_point.x()); + int angle_bin_index = static_cast((angle - min_angle) * angle_increment_inv); + float range = sqrt(scan_point.y() * scan_point.y() + scan_point.x() * scan_point.x()); + int range_bin_index = static_cast(range * range_resolution_inv); + + if ( + angle_bin_index < 0 || angle_bin_index >= angle_bins || range_bin_index < 0 || + range_bin_index >= range_bins) { + return; + } + + std::uint64_t range_int = static_cast(range / RANGE_DISCRETIZATION_RESOLUTION); + std::uint32_t world_x_int = __float_as_uint(map_point.x()); + std::uint32_t world_y_int = __float_as_uint(map_point.y()); + std::uint32_t world_z_int = __float_as_uint(map_point.z()); + + const float scan_z = translation_scan_origin->z(); + -translation_map->z(); + const float obstacle_z = map_point.z() - translation_scan_origin->z(); + const float dz = scan_z - obstacle_z; + + float projected_length, projected_world_x, projected_world_y; + + if (dz > projection_dz_threshold) { + const float ratio = obstacle_z / dz; + projected_length = range * ratio; + projected_world_x = map_point.x() + (map_point.x() - translation_scan_origin->x()) * ratio; + projected_world_y = map_point.y() + (map_point.y() - translation_scan_origin->y()) * ratio; + } else { + projected_length = INFINITY; + projected_world_x = INFINITY; + projected_world_y = INFINITY; + } + + std::uint32_t projected_length_int = __float_as_uint(projected_length); + std::uint32_t projected_world_x_int = __float_as_uint(projected_world_x); + std::uint32_t projected_world_y_int = __float_as_uint(projected_world_y); + + // Can not use std::uint64_t for cuda reasons... + static_assert( + sizeof(std::uint64_t) == sizeof(unsigned long long int), + "unsigned long long int is not 8 bytes"); + unsigned long long int range_and_x_int = (range_int << 32) | world_x_int; + unsigned long long int range_and_y_int = (range_int << 32) | world_y_int; + unsigned long long int range_and_z_int = (range_int << 32) | world_z_int; + unsigned long long int range_and_pl_int = (range_int << 32) | projected_length_int; + unsigned long long int range_and_px_int = (range_int << 32) | projected_world_x_int; + unsigned long long int range_and_py_int = (range_int << 32) | projected_world_y_int; + + std::uint64_t * element_address = + points_tensor + 6 * (angle_bin_index * range_bins + range_bin_index); + + atomicMin(reinterpret_cast(element_address + 0), range_and_x_int); + atomicMin(reinterpret_cast(element_address + 1), range_and_y_int); + atomicMin(reinterpret_cast(element_address + 2), range_and_z_int); + atomicMin(reinterpret_cast(element_address + 3), range_and_pl_int); + atomicMin(reinterpret_cast(element_address + 4), range_and_px_int); + atomicMin(reinterpret_cast(element_address + 5), range_and_py_int); +} + +__global__ void fillEmptySpaceKernel( + const std::uint64_t * points_tensor, const std::size_t angle_bins, const std::size_t range_bins, + const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t empty_value, std::uint8_t * costmap_tensor) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= angle_bins * range_bins) return; + + int angle_bin_index = idx / range_bins; + int range_bin_index = idx % range_bins; + + std::uint64_t range_and_x = + points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 0]; + std::uint32_t range_int = range_and_x >> 32; + + if (range_int == 0xFFFFFFFF) { + return; + } + + std::uint64_t range_and_y = + points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 1]; + float world_x = __uint_as_float(range_and_x & 0xFFFFFFFF); + float world_y = __uint_as_float(range_and_y & 0xFFFFFFFF); + + if (world_x < map_origin_x || world_y < map_origin_y) { + return; + } + + autoware::occupancy_grid_map::utils::raytrace( + scan_origin_x, scan_origin_y, world_x, world_y, map_origin_x, map_origin_y, map_resolution_inv, + num_cells_x, num_cells_y, empty_value, costmap_tensor); +} + +__global__ void fillUnknownSpaceKernel( + const std::uint64_t * raw_points_tensor, const std::uint64_t * obstacle_points_tensor, + const float obstacle_separation_threshold, const std::size_t angle_bins, + const std::size_t range_bins, const float map_resolution_inv, const float scan_origin_x, + const float scan_origin_y, const float scan_origin_z, const float map_origin_x, + const float map_origin_y, const float robot_pose_z, const int num_cells_x, const int num_cells_y, + std::uint8_t free_space_value, std::uint8_t no_information_value, std::uint8_t * costmap_tensor) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= angle_bins * range_bins) return; + + int angle_bin_index = idx / range_bins; + int range_bin_index = idx % range_bins; + + std::uint64_t obs_range_and_x = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 0]; + std::uint64_t obs_range_and_y = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 1]; + std::uint64_t obs_range_and_px = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 4]; + std::uint64_t obs_range_and_py = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 5]; + std::uint32_t obs_range_int = obs_range_and_x >> 32; + float obs_range = obs_range_int * RANGE_DISCRETIZATION_RESOLUTION; + float obs_world_x = __uint_as_float(obs_range_and_x & 0xFFFFFFFF); + float obs_world_y = __uint_as_float(obs_range_and_y & 0xFFFFFFFF); + float obs_world_px = __uint_as_float(obs_range_and_px & 0xFFFFFFFF); + float obs_world_py = __uint_as_float(obs_range_and_py & 0xFFFFFFFF); + + if (obs_range_int == 0xFFFFFFFF || obs_world_x < map_origin_x || obs_world_y < map_origin_y) { + return; + } + + int next_raw_range_bin_index = range_bin_index + 1; + int next_obs_range_bin_index = range_bin_index + 1; + + for (; next_raw_range_bin_index < range_bins; next_raw_range_bin_index++) { + std::uint64_t next_raw_range_and_x = + raw_points_tensor[6 * (angle_bin_index * range_bins + next_raw_range_bin_index) + 0]; + std::uint32_t next_raw_range_int = next_raw_range_and_x >> 32; + + if ( + next_raw_range_int != 0xFFFFFFFF && + isVisibleBeyondObstacle( + obstacle_points_tensor + 6 * (angle_bin_index * range_bins + range_bin_index), + raw_points_tensor + 6 * (angle_bin_index * range_bins + next_raw_range_bin_index), + scan_origin_z, robot_pose_z)) { + break; + } + } + + for (; next_obs_range_bin_index < range_bins; next_obs_range_bin_index++) { + std::uint64_t next_obs_range_and_x = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 0]; + std::uint32_t next_obs_range_int = next_obs_range_and_x >> 32; + + if (next_obs_range_int != 0xFFFFFFFF) { + break; + } + } + + const std::uint64_t next_obs_range_and_x = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 0]; + const std::uint64_t next_obs_range_and_y = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 1]; + const std::uint64_t next_obs_range_and_px = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 4]; + const std::uint64_t next_obs_range_and_py = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 5]; + const std::uint32_t next_obs_range_int = next_obs_range_and_x >> 32; + const float next_obs_world_x = __uint_as_float(next_obs_range_and_x & 0xFFFFFFFF); + const float next_obs_world_y = __uint_as_float(next_obs_range_and_y & 0xFFFFFFFF); + const float next_obs_world_px = __uint_as_float(next_obs_range_and_px & 0xFFFFFFFF); + const float next_obs_world_py = __uint_as_float(next_obs_range_and_py & 0xFFFFFFFF); + + float next_obs_range = + next_obs_range_int * + RANGE_DISCRETIZATION_RESOLUTION; // TODO(knzo25): replace all RANGE_DISCRETIZATION_RESOLUTION + // (1mm) instances with a constant or parameter. handle the + // unlikely overflow + float obs_to_obs_distance = next_obs_range - obs_range; + + const std::uint64_t next_raw_range_and_x = + raw_points_tensor[6 * (angle_bin_index * range_bins + next_raw_range_bin_index) + 0]; + const std::uint64_t next_raw_range_and_y = + raw_points_tensor[6 * (angle_bin_index * range_bins + next_raw_range_bin_index) + 1]; + const std::uint64_t next_raw_range_and_px = + raw_points_tensor[6 * (angle_bin_index * range_bins + next_raw_range_bin_index) + 4]; + const std::uint64_t next_raw_range_and_py = + raw_points_tensor[6 * (angle_bin_index * range_bins + next_raw_range_bin_index) + 5]; + const std::uint32_t next_raw_range_int = next_raw_range_and_x >> 32; + const float next_raw_world_x = __uint_as_float(next_raw_range_and_x & 0xFFFFFFFF); + const float next_raw_world_y = __uint_as_float(next_raw_range_and_y & 0xFFFFFFFF); + const float next_raw_world_px = __uint_as_float(next_raw_range_and_px & 0xFFFFFFFF); + const float next_raw_world_py = __uint_as_float(next_raw_range_and_py & 0xFFFFFFFF); + + if (next_raw_range_int == 0xFFFFFFFF) { + autoware::occupancy_grid_map::utils::raytrace( + obs_world_x, obs_world_y, obs_world_px, obs_world_py, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, no_information_value, costmap_tensor); + return; + } + + if (next_obs_range_int == 0xFFFFFFFF) { + autoware::occupancy_grid_map::utils::raytrace( + obs_world_x, obs_world_y, obs_world_px, obs_world_py, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, no_information_value, costmap_tensor); + return; + } + + float next_raw_range = next_raw_range_int * RANGE_DISCRETIZATION_RESOLUTION; + float raw_to_obs_distance = abs(next_raw_range - obs_range); + + if (obs_to_obs_distance <= obstacle_separation_threshold) { + return; + } + + if (raw_to_obs_distance < obs_to_obs_distance) { + if (next_raw_world_x < map_origin_x || next_raw_world_y < map_origin_y) { + return; + } + + autoware::occupancy_grid_map::utils::raytrace( + obs_world_x, obs_world_y, next_raw_world_x, next_raw_world_y, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, no_information_value, costmap_tensor); + + autoware::occupancy_grid_map::utils::setCellValue( + next_raw_world_x, next_raw_world_y, map_origin_x, map_origin_y, map_resolution_inv, + num_cells_x, num_cells_y, free_space_value, costmap_tensor); + return; + } else { + // fill with no information between obstacles + + if (next_obs_world_x < map_origin_x || next_obs_world_y < map_origin_y) { + return; + } + + autoware::occupancy_grid_map::utils::raytrace( + obs_world_x, obs_world_y, next_obs_world_x, next_obs_world_y, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, no_information_value, costmap_tensor); + + return; + } +} + +__global__ void fillObstaclesKernel( + const std::uint64_t * obstacle_points_tensor, const float distance_margin, + const std::size_t angle_bins, const std::size_t range_bins, const float map_resolution_inv, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t obstacle_value, std::uint8_t * costmap_tensor) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= angle_bins * range_bins) return; + + int angle_bin_index = idx / range_bins; + int range_bin_index = idx % range_bins; + + std::uint64_t range_and_x = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 0]; + std::uint64_t range_and_y = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 1]; + + std::uint32_t range_int = range_and_x >> 32; + float range = range_int * RANGE_DISCRETIZATION_RESOLUTION; + float world_x = __uint_as_float(range_and_x & 0xFFFFFFFF); + float world_y = __uint_as_float(range_and_y & 0xFFFFFFFF); + + if (range < 0.0 || range_int == 0xFFFFFFFF) { + return; + } + + autoware::occupancy_grid_map::utils::setCellValue( + world_x, world_y, map_origin_x, map_origin_y, map_resolution_inv, num_cells_x, num_cells_y, + obstacle_value, costmap_tensor); + + // Look for the next obstacle point + int next_obs_range_bin_index = range_bin_index + 1; + for (; next_obs_range_bin_index < range_bins; next_obs_range_bin_index++) { + std::uint64_t next_obs_range_and_x = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + next_obs_range_bin_index) + 0]; + std::uint32_t next_obs_range_int = next_obs_range_and_x >> 32; + + if (next_obs_range_int != 0xFFFFFFFF) { + break; + } + } + + std::uint64_t next_obs_range_and_x = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 0]; + std::uint64_t next_obs_range_and_y = + obstacle_points_tensor[6 * (angle_bin_index * range_bins + range_bin_index) + 1]; + + std::uint32_t next_obs_range_int = next_obs_range_and_x >> 32; + float next_obs_range = next_obs_range_int * RANGE_DISCRETIZATION_RESOLUTION; + float next_obs_world_x = __uint_as_float(next_obs_range_and_x & 0xFFFFFFFF); + float next_obs_world_y = __uint_as_float(next_obs_range_and_y & 0xFFFFFFFF); + + if (next_obs_range_int == 0xFFFFFFFF || abs(next_obs_range - range) > distance_margin) { + return; + } + + if (next_obs_world_x < map_origin_x || next_obs_world_y < map_origin_y) { + return; + } + + autoware::occupancy_grid_map::utils::raytrace( + world_x, world_y, next_obs_world_x, next_obs_world_y, map_origin_x, map_origin_y, + map_resolution_inv, num_cells_x, num_cells_y, obstacle_value, costmap_tensor); +} + +void prepareRawTensorLaunch( + const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, + const std::size_t angle_bins, const std::size_t range_bins, const float min_height, + const float max_height, const float min_angle, const float angle_increment_inv, + const float range_resolution_inv, const Eigen::Matrix3f * rotation_map, + const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, + const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor, cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (num_points + threadsPerBlock - 1) / threadsPerBlock; + + prepareRawTensorKernel<<>>( + input_pointcloud, num_points, points_step, angle_bins, range_bins, min_height, max_height, + min_angle, angle_increment_inv, range_resolution_inv, rotation_map, translation_map, + rotation_scan, translation_scan, points_tensor); +} + +void prepareObstacleTensorLaunch( + const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, + const std::size_t angle_bins, const std::size_t range_bins, const float min_height, + const float max_height, const float min_angle, const float angle_increment_inv, + const float range_resolution_inv, const float projection_dz_threshold, + const Eigen::Vector3f * translation_scan_origin, const Eigen::Matrix3f * rotation_map, + const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, + const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor, cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (num_points + threadsPerBlock - 1) / threadsPerBlock; + + prepareObstacleTensorKernel<<>>( + input_pointcloud, num_points, points_step, angle_bins, range_bins, min_height, max_height, + min_angle, angle_increment_inv, range_resolution_inv, projection_dz_threshold, + translation_scan_origin, rotation_map, translation_map, rotation_scan, translation_scan, + points_tensor); +} + +void fillEmptySpaceLaunch( + const std::uint64_t * points_tensor, const std::size_t angle_bins, const std::size_t range_bins, + const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t empty_value, std::uint8_t * costmap_tensor, cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (angle_bins * range_bins + threadsPerBlock - 1) / threadsPerBlock; + + fillEmptySpaceKernel<<>>( + points_tensor, angle_bins, range_bins, map_resolution_inv, scan_origin_x, scan_origin_y, + map_origin_x, map_origin_y, num_cells_x, num_cells_y, empty_value, costmap_tensor); +} + +void fillUnknownSpaceLaunch( + const std::uint64_t * raw_points_tensor, const std::uint64_t * obstacle_points_tensor, + const float obstacle_separation_threshold, const std::size_t angle_bins, + const std::size_t range_bins, const float map_resolution_inv, const float scan_origin_x, + const float scan_origin_y, const float scan_origin_z, const float map_origin_x, + const float map_origin_y, const float robot_pose_z, const int num_cells_x, const int num_cells_y, + std::uint8_t free_space_value, std::uint8_t no_information_value, std::uint8_t * costmap_tensor, + cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (angle_bins * range_bins + threadsPerBlock - 1) / threadsPerBlock; + + fillUnknownSpaceKernel<<>>( + raw_points_tensor, obstacle_points_tensor, obstacle_separation_threshold, angle_bins, + range_bins, map_resolution_inv, scan_origin_x, scan_origin_y, scan_origin_z, map_origin_x, + map_origin_y, robot_pose_z, num_cells_x, num_cells_y, free_space_value, no_information_value, + costmap_tensor); +} + +void fillObstaclesLaunch( + const std::uint64_t * obstacle_points_tensor, const float distance_margin, + const std::size_t angle_bins, const std::size_t range_bins, const float map_resolution_inv, + const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t obstacle_value, std::uint8_t * costmap_tensor, cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (angle_bins * range_bins + threadsPerBlock - 1) / threadsPerBlock; + + fillObstaclesKernel<<>>( + obstacle_points_tensor, distance_margin, angle_bins, range_bins, map_resolution_inv, + map_origin_x, map_origin_y, num_cells_x, num_cells_y, obstacle_value, costmap_tensor); +} + +} // namespace costmap_2d::map_projective +} // namespace autoware::occupancy_grid_map diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater.cpp index 771eaabf4f6f9..cdcb0677b7243 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 Tier IV, Inc. +// Copyright 2024 Tier IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ #include "autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater.hpp" #include "autoware/probabilistic_occupancy_grid_map/cost_value/cost_value.hpp" +#include "autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp" #include #include @@ -25,8 +26,9 @@ namespace costmap_2d { OccupancyGridMapBBFUpdater::OccupancyGridMapBBFUpdater( - const unsigned int cells_size_x, const unsigned int cells_size_y, const float resolution) -: OccupancyGridMapUpdaterInterface(cells_size_x, cells_size_y, resolution) + const bool use_cuda, const unsigned int cells_size_x, const unsigned int cells_size_y, + const float resolution) +: OccupancyGridMapUpdaterInterface(use_cuda, cells_size_x, cells_size_y, resolution) { } @@ -41,6 +43,22 @@ void OccupancyGridMapBBFUpdater::initRosParam(rclcpp::Node & node) probability_matrix_(Index::OCCUPIED, Index::FREE) = node.declare_parameter("probability_matrix.free_to_occupied"); v_ratio_ = node.declare_parameter("v_ratio"); + + device_probability_matrix_ = + autoware::cuda_utils::make_unique(Index::NUM_STATES * Index::NUM_STATES); + + std::vector probability_matrix_vector; + probability_matrix_vector.resize(Index::NUM_STATES * Index::NUM_STATES); + + for (size_t j = 0; j < Index::NUM_STATES; j++) { + for (size_t i = 0; i < Index::NUM_STATES; i++) { + probability_matrix_vector[j * Index::NUM_STATES + i] = probability_matrix_(j, i); + } + } + + cudaMemcpy( + device_probability_matrix_.get(), probability_matrix_vector.data(), + sizeof(float) * Index::NUM_STATES * Index::NUM_STATES, cudaMemcpyHostToDevice); } inline unsigned char OccupancyGridMapBBFUpdater::applyBBF( @@ -69,16 +87,37 @@ inline unsigned char OccupancyGridMapBBFUpdater::applyBBF( static_cast(254)); } -bool OccupancyGridMapBBFUpdater::update(const Costmap2D & single_frame_occupancy_grid_map) +bool OccupancyGridMapBBFUpdater::update( + const OccupancyGridMapInterface & single_frame_occupancy_grid_map) { updateOrigin( single_frame_occupancy_grid_map.getOriginX(), single_frame_occupancy_grid_map.getOriginY()); - for (unsigned int x = 0; x < getSizeInCellsX(); x++) { - for (unsigned int y = 0; y < getSizeInCellsY(); y++) { - unsigned int index = getIndex(x, y); - costmap_[index] = applyBBF(single_frame_occupancy_grid_map.getCost(x, y), costmap_[index]); + + if (use_cuda_ != single_frame_occupancy_grid_map.isCudaEnabled()) { + throw std::runtime_error("The CUDA setting of the updater and the map do not match."); + } + + if (use_cuda_) { + applyBBFLaunch( + single_frame_occupancy_grid_map.getDeviceCostmap().get(), device_probability_matrix_.get(), + Index::NUM_STATES, Index::FREE, Index::OCCUPIED, cost_value::FREE_SPACE, + cost_value::LETHAL_OBSTACLE, cost_value::NO_INFORMATION, v_ratio_, + getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_); + + cudaMemcpy( + costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t), + cudaMemcpyDeviceToHost); + + cudaStreamSynchronize(stream_); + } else { + for (unsigned int x = 0; x < getSizeInCellsX(); x++) { + for (unsigned int y = 0; y < getSizeInCellsY(); y++) { + unsigned int index = getIndex(x, y); + costmap_[index] = applyBBF(single_frame_occupancy_grid_map.getCost(x, y), costmap_[index]); + } } } + return true; } diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu new file mode 100644 index 0000000000000..ef809372c31c0 --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu @@ -0,0 +1,81 @@ +// Copyright 2024 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__probabilistic_occupancy_grid_map__UPDATER__BINARY_BAYES_FILTER_UPDATER_KERNEL_HPP_ +#define AUTOWARE__probabilistic_occupancy_grid_map__UPDATER__BINARY_BAYES_FILTER_UPDATER_KERNEL_HPP_ + +#include "autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp" + +#include + +__global__ void applyBBFKernel( + const std::uint8_t * z_costmap, const float * probability_matrix, const int num_states, + const int free_index, const int occupied_index, const std::uint8_t free_space_value, + const std::uint8_t lethal_ostacle_value, const std::uint8_t no_information_value, + const double v_ratio_, const int num_elements, std::uint8_t * o_costmap) +{ + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_elements) { + return; + } + + const std::uint8_t z = z_costmap[idx]; + const std::uint8_t o = o_costmap[idx]; + + constexpr float cost2p = 1.f / 255.f; + const float po = o * cost2p; + float pz{}; + float not_pz{}; + float po_hat{}; + if (z == lethal_ostacle_value) { + pz = probability_matrix[occupied_index * num_states + occupied_index]; + not_pz = probability_matrix[free_index * num_states + occupied_index]; + po_hat = ((po * pz) / ((po * pz) + ((1.f - po) * not_pz))); + } else if (z == free_space_value) { + pz = 1.f - probability_matrix[free_index * num_states + free_index]; + not_pz = 1.f - probability_matrix[occupied_index * num_states + free_index]; + po_hat = ((po * pz) / ((po * pz) + ((1.f - po) * not_pz))); + } else if (z == no_information_value) { + const float inv_v_ratio = 1.f / v_ratio_; + po_hat = ((po + (0.5f * inv_v_ratio)) / ((1.f * inv_v_ratio) + 1.f)); + } + + o_costmap[idx] = std::min( + std::max( + static_cast(std::lround(po_hat * 255.f)), static_cast(1)), + static_cast(254)); +} + +namespace autoware::occupancy_grid_map +{ +namespace costmap_2d +{ + +void applyBBFLaunch( + const std::uint8_t * z_costmap, const float * probability_matrix, const int num_states, + const int free_index, const int occupied_index, const std::uint8_t free_space_value, + const std::uint8_t lethal_ostacle_value, const std::uint8_t no_information_value, + const double v_ratio_, const int num_elements, std::uint8_t * o_costmap, cudaStream_t stream) +{ + const int num_threads = 256; + const int num_blocks = (num_elements + num_threads - 1) / num_threads; + applyBBFKernel<<>>( + z_costmap, probability_matrix, num_states, free_index, occupied_index, free_space_value, + lethal_ostacle_value, no_information_value, v_ratio_, num_elements, o_costmap); +} + +} // namespace costmap_2d +} // namespace autoware::occupancy_grid_map + +#endif // AUTOWARE__probabilistic_occupancy_grid_map__UPDATER__BINARY_BAYES_FILTER_UPDATER_KERNEL_HPP_ diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater.cpp index 9f3c3c7e40eaf..b1a8ff937c495 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 TIER IV, Inc. +// Copyright 2024 TIER IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ #include "autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater.hpp" #include "autoware/probabilistic_occupancy_grid_map/cost_value/cost_value.hpp" +#include "autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp" #include @@ -39,7 +40,7 @@ inline unsigned char OccupancyGridMapLOBFUpdater::applyLOBF( constexpr unsigned char unknown = cost_value::NO_INFORMATION; constexpr unsigned char unknown_margin = 1; - /* Tau and ST decides how fast the observation decay to the unknown status*/ + // Tau and ST decides how fast the observation decay to the unknown status constexpr double tau = 0.75; constexpr double sample_time = 0.1; @@ -58,16 +59,35 @@ inline unsigned char OccupancyGridMapLOBFUpdater::applyLOBF( } } -bool OccupancyGridMapLOBFUpdater::update(const Costmap2D & single_frame_occupancy_grid_map) +bool OccupancyGridMapLOBFUpdater::update( + const OccupancyGridMapInterface & single_frame_occupancy_grid_map) { updateOrigin( single_frame_occupancy_grid_map.getOriginX(), single_frame_occupancy_grid_map.getOriginY()); - for (unsigned int x = 0; x < getSizeInCellsX(); x++) { - for (unsigned int y = 0; y < getSizeInCellsY(); y++) { - unsigned int index = getIndex(x, y); - costmap_[index] = applyLOBF(single_frame_occupancy_grid_map.getCost(x, y), costmap_[index]); + + if (use_cuda_ != single_frame_occupancy_grid_map.isCudaEnabled()) { + throw std::runtime_error("The CUDA setting of the updater and the map do not match."); + } + + if (use_cuda_) { + applyLOBFLaunch( + single_frame_occupancy_grid_map.getDeviceCostmap().get(), cost_value::NO_INFORMATION, + getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_); + + cudaMemcpy( + costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t), + cudaMemcpyDeviceToHost); + + cudaStreamSynchronize(stream_); + } else { + for (unsigned int x = 0; x < getSizeInCellsX(); x++) { + for (unsigned int y = 0; y < getSizeInCellsY(); y++) { + unsigned int index = getIndex(x, y); + costmap_[index] = applyLOBF(single_frame_occupancy_grid_map.getCost(x, y), costmap_[index]); + } } } + return true; } diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu new file mode 100644 index 0000000000000..068bc41d0c8d2 --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu @@ -0,0 +1,93 @@ +// Copyright 2024 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__probabilistic_occupancy_grid_map__UPDATER__log_odds_bayes_filter_updater_kernel_HPP_ +#define AUTOWARE__probabilistic_occupancy_grid_map__UPDATER__log_odds_bayes_filter_updater_kernel_HPP_ + +#include "autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp" + +#include + +#define EPSILON_PROB 0.03 + +namespace autoware::occupancy_grid_map +{ +namespace costmap_2d +{ + +__host__ __device__ __forceinline__ double convertCharToProbability(const std::uint8_t value) +{ + return static_cast(value) / 255.0; +} + +__host__ __device__ __forceinline__ std::uint8_t convertProbabilityToChar(const double value) +{ + return static_cast(value * 255.0); +} + +__host__ __device__ __forceinline__ double logOddsFusion(const double p1, const double p2) +{ + double log_odds = 0.0; + + const double p1_norm = std::max(EPSILON_PROB, std::min(1.0 - EPSILON_PROB, p1)); + log_odds += std::log(p1_norm / (1.0 - p1_norm)); + + const double p2_norm = std::max(EPSILON_PROB, std::min(1.0 - EPSILON_PROB, p1)); + log_odds += std::log(p2_norm / (1.0 - p2_norm)); + + return 1.0 / (1.0 + std::exp(-log_odds)); +} + +__global__ void applyLOBFKernel( + const std::uint8_t * z_costmap, const std::uint8_t unknown_value, const int num_elements, + std::uint8_t * o_costmap) +{ + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_elements) { + return; + } + + const std::uint8_t z = z_costmap[idx]; + const std::uint8_t o = o_costmap[idx]; + + const std::uint8_t unknown_margin = 1; + const double tau = 0.75; + const double sample_time = 0.1; + + if (z >= unknown_value - unknown_margin && z <= unknown_value + unknown_margin) { + const int diff = static_cast(o) - static_cast(unknown_value); + const double decay = std::exp(-sample_time / tau); + const double fused = static_cast(unknown_value) + static_cast(diff) * decay; + o_costmap[idx] = static_cast(fused); + } else { + const unsigned char fused = convertProbabilityToChar( + logOddsFusion(convertCharToProbability(z), convertCharToProbability(o))); + o_costmap[idx] = static_cast(fused); + } +} + +void applyLOBFLaunch( + const std::uint8_t * z_costmap, const std::uint8_t no_information_value, const int num_elements, + std::uint8_t * o_costmap, cudaStream_t stream) +{ + const int num_threads = 256; + const int num_blocks = (num_elements + num_threads - 1) / num_threads; + applyLOBFKernel<<>>( + z_costmap, no_information_value, num_elements, o_costmap); +} + +} // namespace costmap_2d +} // namespace autoware::occupancy_grid_map + +#endif // AUTOWARE__probabilistic_occupancy_grid_map__UPDATER__log_odds_bayes_filter_updater_kernel_HPP_ diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils.cpp index 9d190664b8858..c57bd0e081af7 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Tier IV, Inc. +// Copyright 2024 Tier IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -47,17 +47,28 @@ bool transformPointcloud( return true; } -// used in pointcloud based occupancy grid map -void transformPointcloud( - const sensor_msgs::msg::PointCloud2 & input, const geometry_msgs::msg::Pose & pose, - sensor_msgs::msg::PointCloud2 & output) +bool transformPointcloud( + CudaPointCloud2 & input, const tf2_ros::Buffer & tf2, const std::string & target_frame) { - const auto transform = autoware::universe_utils::pose2transform(pose); - Eigen::Matrix4f tf_matrix = tf2::transformToEigen(transform).matrix().cast(); - - pcl_ros::transformPointCloud(tf_matrix, input, output); - output.header.stamp = input.header.stamp; - output.header.frame_id = ""; + geometry_msgs::msg::TransformStamped tf_stamped; + // lookup transform + try { + tf_stamped = tf2.lookupTransform( + target_frame, input.header.frame_id, input.header.stamp, rclcpp::Duration::from_seconds(0.5)); + } catch (tf2::TransformException & ex) { + RCLCPP_WARN( + rclcpp::get_logger("probabilistic_occupancy_grid_map"), "Failed to lookup transform: %s", + ex.what()); + return false; + } + // transform pointcloud + Eigen::Matrix4f tf_matrix = tf2::transformToEigen(tf_stamped.transform).matrix().cast(); + Eigen::Matrix3f rotation = tf_matrix.block<3, 3>(0, 0); + Eigen::Vector3f translation = tf_matrix.block<3, 1>(0, 3); + transformPointCloudLaunch( + input.data.get(), input.width * input.height, input.point_step, rotation, translation); + input.header.frame_id = target_frame; + return true; } Eigen::Matrix4f getTransformMatrix(const geometry_msgs::msg::Pose & pose) @@ -145,51 +156,5 @@ geometry_msgs::msg::Pose getInversePose(const geometry_msgs::msg::Pose & pose) return inv_pose; } -/** - * @brief extract Common Pointcloud between obstacle pc and raw pc - * @param obstacle_pc - * @param raw_pc - * @param output_obstacle_pc - */ -bool extractCommonPointCloud( - const sensor_msgs::msg::PointCloud2 & obstacle_pc, const sensor_msgs::msg::PointCloud2 & raw_pc, - sensor_msgs::msg::PointCloud2 & output_obstacle_pc) -{ - // Convert to vector of 3d points - std::vector v_obstacle_pc, v_raw_pc, v_output_obstacle_pc; - for (sensor_msgs::PointCloud2ConstIterator iter_x(obstacle_pc, "x"), - iter_y(obstacle_pc, "y"), iter_z(obstacle_pc, "z"); - iter_x != iter_x.end(); ++iter_x, ++iter_y, ++iter_z) { - v_obstacle_pc.push_back(MyPoint3d(*iter_x, *iter_y, *iter_z)); - } - for (sensor_msgs::PointCloud2ConstIterator iter_x(raw_pc, "x"), iter_y(raw_pc, "y"), - iter_z(raw_pc, "z"); - iter_x != iter_x.end(); ++iter_x, ++iter_y, ++iter_z) { - v_raw_pc.push_back(MyPoint3d(*iter_x, *iter_y, *iter_z)); - } - - // sort pointclouds for searching cross points: O(nlogn) - std::sort(v_obstacle_pc.begin(), v_obstacle_pc.end(), [](auto a, auto b) { return a < b; }); - std::sort(v_raw_pc.begin(), v_raw_pc.end(), [](auto a, auto b) { return a < b; }); - - // calc intersection points of two pointclouds: O(n) - std::set_intersection( - v_obstacle_pc.begin(), v_obstacle_pc.end(), v_raw_pc.begin(), v_raw_pc.end(), - std::back_inserter(v_output_obstacle_pc)); - if (v_output_obstacle_pc.size() == 0) { - return false; - } - - // Convert to ros msg - pcl::PointCloud::Ptr pcl_output(new pcl::PointCloud); - for (auto p : v_output_obstacle_pc) { - pcl_output->push_back(pcl::PointXYZ(p.x, p.y, p.z)); - } - pcl::toROSMsg(*pcl_output, output_obstacle_pc); - output_obstacle_pc.header = obstacle_pc.header; - - return true; -} - } // namespace utils } // namespace autoware::occupancy_grid_map diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu new file mode 100644 index 0000000000000..da2ee86a3d84f --- /dev/null +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu @@ -0,0 +1,295 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/********************************************************************* + * + * Software License Agreement (BSD License) + * + * Copyright (c) 2008, 2013, Willow Garage, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of Willow Garage, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Eitan Marder-Eppstein + * David V. Lu!! + *********************************************************************/ + +#include "autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp" + +namespace autoware::occupancy_grid_map +{ +namespace utils +{ + +inline __device__ bool worldToMap( + float wx, float wy, unsigned int & mx, unsigned int & my, float origin_x, float origin_y, + float resolution_inv, int size_x, int size_y) +{ + if (wx < origin_x || wy < origin_y) { + return false; + } + + mx = static_cast(std::floor((wx - origin_x) * resolution_inv)); + my = static_cast(std::floor((wy - origin_y) * resolution_inv)); + + if (mx < size_x && my < size_y) { + return true; + } + + return false; +} + +__device__ void setCellValue( + float wx, float wy, float origin_x, float origin_y, float resolution_inv, int size_x, int size_y, + std::uint8_t value, std::uint8_t * costmap_tensor) +{ + unsigned int mx, my; + if (!worldToMap(wx, wy, mx, my, origin_x, origin_y, resolution_inv, size_x, size_y)) { + return; + } + + costmap_tensor[my * size_x + mx] = value; +} + +inline __device__ void bresenham2D( + unsigned int abs_da, unsigned int abs_db, int error_b, int offset_a, int offset_b, + unsigned int offset, unsigned int max_length, std::uint8_t cost, std::uint8_t * costmap_tensor) +{ + unsigned int end = min(max_length, abs_da); + for (unsigned int i = 0; i < end; ++i) { + costmap_tensor[offset] = cost; + offset += offset_a; + error_b += abs_db; + if ((unsigned int)error_b >= abs_da) { + offset += offset_b; + error_b -= abs_da; + } + } + costmap_tensor[offset] = cost; +} + +/** + * @brief Raytrace a line and apply some action at each step + * @param at The action to take... a functor + * @param x0 The starting x coordinate + * @param y0 The starting y coordinate + * @param x1 The ending x coordinate + * @param y1 The ending y coordinate + * @param max_length The maximum desired length of the segment... + * allows you to not go all the way to the endpoint + * @param min_length The minimum desired length of the segment + */ +inline __device__ void raytraceLine( + unsigned int x0, unsigned int y0, unsigned int x1, unsigned int y1, unsigned int max_length, + unsigned int min_length, unsigned int size_x, std::uint8_t cost, std::uint8_t * costmap_tensor) +{ + int dx_full = x1 - x0; + int dy_full = y1 - y0; + + // we need to chose how much to scale our dominant dimension, + // based on the maximum length of the line + float dist = sqrt((float)(dx_full * dx_full + dy_full * dy_full)); // hypot(dx_full, dy_full); + if (dist < min_length) { + return; + } + + unsigned int min_x0, min_y0; + if (dist > 0.0) { + // Adjust starting point and offset to start from min_length distance + min_x0 = (unsigned int)(x0 + dx_full / dist * min_length); + min_y0 = (unsigned int)(y0 + dy_full / dist * min_length); + } else { + // dist can be 0 if [x0, y0]==[x1, y1]. + // In this case only this cell should be processed. + min_x0 = x0; + min_y0 = y0; + } + unsigned int offset = min_y0 * size_x + min_x0; + + int dx = x1 - min_x0; + int dy = y1 - min_y0; + + unsigned int abs_dx = abs(dx); + unsigned int abs_dy = abs(dy); + + int offset_dx = dx > 0 ? 1 : -1; // sign(dx); + int offset_dy = dy > 0 ? size_x : -size_x; // sign(dy) * size_x; + + float scale = (dist == 0.0) ? 1.0 : min(1.f, max_length / dist); + // if x is dominant + if (abs_dx >= abs_dy) { + int error_y = abs_dx / 2; + + bresenham2D( + abs_dx, abs_dy, error_y, offset_dx, offset_dy, offset, (unsigned int)(scale * abs_dx), cost, + costmap_tensor); + return; + } + + // otherwise y is dominant + int error_x = abs_dy / 2; + + bresenham2D( + abs_dy, abs_dx, error_x, offset_dy, offset_dx, offset, (unsigned int)(scale * abs_dy), cost, + costmap_tensor); +} + +void __device__ raytrace( + const float source_x, const float source_y, const float target_x, const float target_y, + const float origin_x, float origin_y, const float resolution_inv, const int size_x, + const int size_y, const std::uint8_t cost, std::uint8_t * costmap_tensor) +{ + unsigned int x0{}; + unsigned int y0{}; + const float ox{source_x}; + const float oy{source_y}; + + if (!worldToMap(ox, oy, x0, y0, origin_x, origin_y, resolution_inv, size_x, size_y)) { + return; + } + + // we can pre-compute the endpoints of the map outside of the inner loop... we'll need these later + const float resolution = 1.0 / resolution_inv; + const float map_end_x = origin_x + size_x * resolution; + const float map_end_y = origin_y + size_y * resolution; + + float wx = target_x; + float wy = target_y; + + // now we also need to make sure that the endpoint we're ray-tracing + // to isn't off the costmap and scale if necessary + const float a = wx - ox; + const float b = wy - oy; + + // the minimum value to raytrace from is the origin + if (wx < origin_x) { + const float t = (origin_x - ox) / a; + wx = origin_x; + wy = oy + b * t; + } + if (wy < origin_y) { + const float t = (origin_y - oy) / b; + wx = ox + a * t; + wy = origin_y; + } + + // the maximum value to raytrace to is the end of the map + if (wx > map_end_x) { + const float t = (map_end_x - ox) / a; + wx = map_end_x - .001; + wy = oy + b * t; + } + if (wy > map_end_y) { + const float t = (map_end_y - oy) / b; + wx = ox + a * t; + wy = map_end_y - .001; + } + + // now that the vector is scaled correctly... we'll get the map coordinates of its endpoint + unsigned int x1{}; + unsigned int y1{}; + + // check for legality just in case + if (!worldToMap(wx, wy, x1, y1, origin_x, origin_y, resolution_inv, size_x, size_y)) { + return; + } + + constexpr unsigned int cell_raytrace_range = 10000; // large number to ignore range threshold + raytraceLine(x0, y0, x1, y1, cell_raytrace_range, 0, size_x, cost, costmap_tensor); +} + +__global__ void transformPointCloudKernel( + std::uint8_t * points, std::size_t num_points, std::size_t points_step, + const Eigen::Matrix3f & rotation, const Eigen::Vector3f & translation) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_points) { + return; + } + + Eigen::Map point_map(reinterpret_cast(points + idx * points_step)); + point_map = rotation * point_map + translation; +} + +__global__ void copyMapRegionKernel( + const std::uint8_t * source_map, unsigned int sm_lower_left_x, unsigned int sm_lower_left_y, + unsigned int sm_size_x, std::uint8_t * dest_map, unsigned int dm_lower_left_x, + unsigned int dm_lower_left_y, unsigned int dm_size_x, unsigned int region_size_x, + unsigned int region_size_y) +{ + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= region_size_x * region_size_y) return; + + const int region_y = idx / region_size_x; + const int region_x = idx % region_size_x; + + const int sm_index = + sm_lower_left_y * sm_size_x + sm_lower_left_x + region_y * sm_size_x + region_x; + const int dm_index = + dm_lower_left_y * dm_size_x + dm_lower_left_x + region_y * dm_size_x + region_x; + + dest_map[dm_index] = source_map[sm_index]; +} + +void transformPointCloudLaunch( + std::uint8_t * points, std::size_t num_points, std::size_t points_step, + const Eigen::Matrix3f & rotation, const Eigen::Vector3f & translation) +{ + // Launch kernel + int block_size = 256; + int num_blocks = (num_points + block_size - 1) / block_size; + transformPointCloudKernel<<>>( + points, num_points, points_step, rotation, translation); +} + +void copyMapRegionLaunch( + const std::uint8_t * source_map, unsigned int sm_lower_left_x, unsigned int sm_lower_left_y, + unsigned int sm_size_x, std::uint8_t * dest_map, unsigned int dm_lower_left_x, + unsigned int dm_lower_left_y, unsigned int dm_size_x, unsigned int region_size_x, + unsigned int region_size_y, cudaStream_t stream) +{ + const int threadsPerBlock = 256; + const int blocksPerGrid = (region_size_x * region_size_y + threadsPerBlock - 1) / threadsPerBlock; + + copyMapRegionKernel<<>>( + source_map, sm_lower_left_x, sm_lower_left_y, sm_size_x, dest_map, dm_lower_left_x, + dm_lower_left_y, dm_size_x, region_size_x, region_size_y); +} + +} // namespace utils +} // namespace autoware::occupancy_grid_map diff --git a/perception/autoware_probabilistic_occupancy_grid_map/package.xml b/perception/autoware_probabilistic_occupancy_grid_map/package.xml index ece7ef0dec663..8e956b7570db4 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/package.xml +++ b/perception/autoware_probabilistic_occupancy_grid_map/package.xml @@ -15,6 +15,7 @@ autoware_cmake eigen3_cmake_module + autoware_cuda_utils autoware_universe_utils grid_map_costmap_2d grid_map_msgs diff --git a/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.cpp b/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.cpp index 880dfda03c495..31cc1aa18c1b5 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 TIER IV, Inc. +// Copyright 2024 TIER IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -115,8 +115,8 @@ GridMapFusionNode::GridMapFusionNode(const rclcpp::NodeOptions & node_options) // updater occupancy_grid_map_updater_ptr_ = std::make_shared( - fusion_map_length_x_ / fusion_map_resolution_, fusion_map_length_y_ / fusion_map_resolution_, - fusion_map_resolution_); + false, fusion_map_length_x_ / fusion_map_resolution_, + fusion_map_length_y_ / fusion_map_resolution_, fusion_map_resolution_); // Set timer const auto period_ns = std::chrono::duration_cast( @@ -309,7 +309,7 @@ void GridMapFusionNode::publish() // fusion map // single frame gridmap fusion - auto fused_map = SingleFrameOccupancyFusion(subscribed_maps, latest_stamp, weights); + auto & fused_map = SingleFrameOccupancyFusion(subscribed_maps, latest_stamp, weights); // multi frame gridmap fusion occupancy_grid_map_updater_ptr_->update(fused_map); @@ -341,7 +341,7 @@ void GridMapFusionNode::publish() } } -OccupancyGridMapFixedBlindSpot GridMapFusionNode::SingleFrameOccupancyFusion( +const OccupancyGridMapFixedBlindSpot & GridMapFusionNode::SingleFrameOccupancyFusion( std::vector & occupancy_grid_maps, const builtin_interfaces::msg::Time latest_stamp, const std::vector & weights) { @@ -368,21 +368,29 @@ OccupancyGridMapFixedBlindSpot GridMapFusionNode::SingleFrameOccupancyFusion( inner_st_ptr = std::make_unique("create_fused_map", *time_keeper_); // init fused map with calculated origin - OccupancyGridMapFixedBlindSpot fused_map( - occupancy_grid_maps[0].getSizeInCellsX(), occupancy_grid_maps[0].getSizeInCellsY(), - occupancy_grid_maps[0].getResolution()); - fused_map.updateOrigin( - gridmap_origin.position.x - fused_map.getSizeInMetersX() / 2, - gridmap_origin.position.y - fused_map.getSizeInMetersY() / 2); + fused_map_->resetMaps(); + + if ( + !fused_map_ || fused_map_->getSizeInCellsX() != occupancy_grid_maps[0].getSizeInCellsX() || + fused_map_->getSizeInCellsY() != occupancy_grid_maps[0].getSizeInCellsY() || + fused_map_->getResolution() != occupancy_grid_maps[0].getResolution()) { + fused_map_ = std::make_unique( + false, occupancy_grid_maps[0].getSizeInCellsX(), occupancy_grid_maps[0].getSizeInCellsY(), + occupancy_grid_maps[0].getResolution()); + } + + fused_map_->updateOrigin( + gridmap_origin.position.x - fused_map_->getSizeInMetersX() / 2, + gridmap_origin.position.y - fused_map_->getSizeInMetersY() / 2); // fix origin of each map for (auto & map : occupancy_grid_maps) { - map.updateOrigin(fused_map.getOriginX(), fused_map.getOriginY()); + map.updateOrigin(fused_map_->getOriginX(), fused_map_->getOriginY()); } // assume map is same size and resolutions - for (unsigned int x = 0; x < fused_map.getSizeInCellsX(); x++) { - for (unsigned int y = 0; y < fused_map.getSizeInCellsY(); y++) { + for (unsigned int x = 0; x < fused_map_->getSizeInCellsX(); x++) { + for (unsigned int y = 0; y < fused_map_->getSizeInCellsY(); y++) { // get cost of each map std::vector costs; for (auto & map : occupancy_grid_maps) { @@ -393,11 +401,11 @@ OccupancyGridMapFixedBlindSpot GridMapFusionNode::SingleFrameOccupancyFusion( auto fused_cost = fusion_policy::singleFrameOccupancyFusion(costs, fusion_method_, weights); // set max cost to fused map - fused_map.setCost(x, y, fused_cost); + fused_map_->setCost(x, y, fused_cost); } } - return fused_map; + return *fused_map_; } // scope for time keeper ends } @@ -408,7 +416,7 @@ OccupancyGridMapFixedBlindSpot GridMapFusionNode::OccupancyGridMsgToGridMap( if (time_keeper_) st_ptr = std::make_unique(__func__, *time_keeper_); OccupancyGridMapFixedBlindSpot gridmap( - occupancy_grid_map.info.width, occupancy_grid_map.info.height, + false, occupancy_grid_map.info.width, occupancy_grid_map.info.height, occupancy_grid_map.info.resolution); gridmap.updateOrigin( occupancy_grid_map.info.origin.position.x, occupancy_grid_map.info.origin.position.y); diff --git a/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.hpp b/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.hpp index 0f9475e476921..0d894d99a68a1 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Tier IV, Inc. +// Copyright 2024 Tier IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -69,7 +69,7 @@ class GridMapFusionNode : public rclcpp::Node OccupancyGridMapFixedBlindSpot OccupancyGridMsgToGridMap( const nav_msgs::msg::OccupancyGrid & occupancy_grid_map); - OccupancyGridMapFixedBlindSpot SingleFrameOccupancyFusion( + const OccupancyGridMapFixedBlindSpot & SingleFrameOccupancyFusion( std::vector & occupancy_grid_maps, const builtin_interfaces::msg::Time latest_stamp, const std::vector & weights); @@ -103,6 +103,7 @@ class GridMapFusionNode : public rclcpp::Node // cache for fusion std::mutex mutex_; + std::unique_ptr fused_map_; std::shared_ptr occupancy_grid_map_updater_ptr_; // contains fused grid map std::map diff --git a/perception/autoware_probabilistic_occupancy_grid_map/src/laserscan_based_occupancy_grid_map/laserscan_based_occupancy_grid_map_node.cpp b/perception/autoware_probabilistic_occupancy_grid_map/src/laserscan_based_occupancy_grid_map/laserscan_based_occupancy_grid_map_node.cpp index cb1b22a519e2d..4a8c9626de013 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/src/laserscan_based_occupancy_grid_map/laserscan_based_occupancy_grid_map_node.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/src/laserscan_based_occupancy_grid_map/laserscan_based_occupancy_grid_map_node.cpp @@ -95,14 +95,14 @@ LaserscanBasedOccupancyGridMapNode::LaserscanBasedOccupancyGridMapNode( const std::string updater_type = this->declare_parameter("updater_type"); if (updater_type == "binary_bayes_filter") { occupancy_grid_map_updater_ptr_ = std::make_shared( - map_length / map_resolution, map_width / map_resolution, map_resolution); + false, map_length / map_resolution, map_width / map_resolution, map_resolution); } else { RCLCPP_WARN( get_logger(), "specified occupancy grid map updater type [%s] is not found, use binary_bayes_filter", updater_type.c_str()); occupancy_grid_map_updater_ptr_ = std::make_shared( - map_length / map_resolution, map_width / map_resolution, map_resolution); + false, map_length / map_resolution, map_width / map_resolution, map_resolution); } occupancy_grid_map_updater_ptr_->initRosParam(*this); diff --git a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp index 4d442ad2c33db..cf169e6005c82 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 Tier IV, Inc. +// Copyright 2024 Tier IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -85,26 +85,27 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode( const std::string updater_type = this->declare_parameter("updater_type"); if (updater_type == "binary_bayes_filter") { occupancy_grid_map_updater_ptr_ = std::make_unique( - map_length / map_resolution, map_length / map_resolution, map_resolution); + true, map_length / map_resolution, map_length / map_resolution, map_resolution); } else { RCLCPP_WARN( get_logger(), "specified occupancy grid map updater type [%s] is not found, use binary_bayes_filter", updater_type.c_str()); occupancy_grid_map_updater_ptr_ = std::make_unique( - map_length / map_resolution, map_length / map_resolution, map_resolution); + true, map_length / map_resolution, map_length / map_resolution, map_resolution); } occupancy_grid_map_updater_ptr_->initRosParam(*this); const std::string grid_map_type = this->declare_parameter("grid_map_type"); + if (grid_map_type == "OccupancyGridMapProjectiveBlindSpot") { occupancy_grid_map_ptr_ = std::make_unique( - occupancy_grid_map_updater_ptr_->getSizeInCellsX(), + true, occupancy_grid_map_updater_ptr_->getSizeInCellsX(), occupancy_grid_map_updater_ptr_->getSizeInCellsY(), occupancy_grid_map_updater_ptr_->getResolution()); } else if (grid_map_type == "OccupancyGridMapFixedBlindSpot") { occupancy_grid_map_ptr_ = std::make_unique( - occupancy_grid_map_updater_ptr_->getSizeInCellsX(), + true, occupancy_grid_map_updater_ptr_->getSizeInCellsX(), occupancy_grid_map_updater_ptr_->getSizeInCellsY(), occupancy_grid_map_updater_ptr_->getResolution()); } else { @@ -113,7 +114,7 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode( "specified occupancy grid map type [%s] is not found, use OccupancyGridMapFixedBlindSpot", grid_map_type.c_str()); occupancy_grid_map_ptr_ = std::make_unique( - occupancy_grid_map_updater_ptr_->getSizeInCellsX(), + true, occupancy_grid_map_updater_ptr_->getSizeInCellsX(), occupancy_grid_map_updater_ptr_->getSizeInCellsY(), occupancy_grid_map_updater_ptr_->getResolution()); } @@ -151,30 +152,27 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( if (stop_watch_ptr_) { stop_watch_ptr_->toc("processing_time", true); } - // if scan_origin_frame_ is "", replace it with input_raw_msg->header.frame_id + + raw_pointcloud_.fromROSMsg(*input_raw_msg); + obstacle_pointcloud_.fromROSMsg(*input_obstacle_msg); + + // if scan_origin_frame_ is "", replace it with raw_pointcloud_.header.frame_id if (scan_origin_frame_.empty()) { - scan_origin_frame_ = input_raw_msg->header.frame_id; + scan_origin_frame_ = raw_pointcloud_.header.frame_id; } - PointCloud2 trans_input_raw{}, trans_input_obstacle{}; - bool is_raw_transformed = false; - bool is_obstacle_transformed = false; - // Prepare for applying height filter if (use_height_filter_) { // Make sure that the frame is base_link - if (input_raw_msg->header.frame_id != base_link_frame_) { - if (!utils::transformPointcloud(*input_raw_msg, *tf2_, base_link_frame_, trans_input_raw)) { + if (raw_pointcloud_.header.frame_id != base_link_frame_) { + if (!utils::transformPointcloud(raw_pointcloud_, *tf2_, base_link_frame_)) { return; } - is_raw_transformed = true; } if (input_obstacle_msg->header.frame_id != base_link_frame_) { - if (!utils::transformPointcloud( - *input_obstacle_msg, *tf2_, base_link_frame_, trans_input_obstacle)) { + if (!utils::transformPointcloud(obstacle_pointcloud_, *tf2_, base_link_frame_)) { return; } - is_obstacle_transformed = true; } occupancy_grid_map_ptr_->setHeightLimit(min_height_, max_height_); } else { @@ -182,32 +180,16 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( -std::numeric_limits::infinity(), std::numeric_limits::infinity()); } - const PointCloud2::ConstSharedPtr input_raw_use = - is_raw_transformed ? std::make_shared(trans_input_raw) : input_raw_msg; - const PointCloud2::ConstSharedPtr input_obstacle_use = - is_obstacle_transformed ? std::make_shared(trans_input_obstacle) - : input_obstacle_msg; - - // Filter obstacle pointcloud by raw pointcloud - PointCloud2 input_obstacle_pc_common{}; - bool use_input_obstacle_pc_common = false; - if (filter_obstacle_pointcloud_by_raw_pointcloud_) { - if (utils::extractCommonPointCloud( - *input_obstacle_use, *input_raw_use, input_obstacle_pc_common)) { - use_input_obstacle_pc_common = true; - } - } - // Get from map to sensor frame pose Pose robot_pose{}; Pose gridmap_origin{}; Pose scan_origin{}; try { - robot_pose = utils::getPose(input_raw_msg->header.stamp, *tf2_, base_link_frame_, map_frame_); + robot_pose = utils::getPose(raw_pointcloud_.header.stamp, *tf2_, base_link_frame_, map_frame_); gridmap_origin = - utils::getPose(input_raw_msg->header.stamp, *tf2_, gridmap_origin_frame_, map_frame_); + utils::getPose(raw_pointcloud_.header.stamp, *tf2_, gridmap_origin_frame_, map_frame_); scan_origin = - utils::getPose(input_raw_msg->header.stamp, *tf2_, scan_origin_frame_, map_frame_); + utils::getPose(raw_pointcloud_.header.stamp, *tf2_, scan_origin_frame_, map_frame_); } catch (tf2::TransformException & ex) { RCLCPP_WARN_STREAM(get_logger(), ex.what()); return; @@ -224,9 +206,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( gridmap_origin.position.x - occupancy_grid_map_ptr_->getSizeInMetersX() / 2, gridmap_origin.position.y - occupancy_grid_map_ptr_->getSizeInMetersY() / 2); occupancy_grid_map_ptr_->updateWithPointCloud( - *input_raw_use, - (use_input_obstacle_pc_common ? input_obstacle_pc_common : *input_obstacle_use), robot_pose, - scan_origin); + raw_pointcloud_, obstacle_pointcloud_, robot_pose, scan_origin); } if (enable_single_frame_mode_) { diff --git a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.hpp b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.hpp index 34afd5edc7c37..c985414ffdf6b 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.hpp @@ -1,4 +1,4 @@ -// Copyright 2021 Tier IV, Inc. +// Copyright 2024 Tier IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include "autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp" #include "autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater.hpp" #include "autoware/probabilistic_occupancy_grid_map/updater/ogm_updater_interface.hpp" +#include "autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp" #include #include @@ -82,6 +83,9 @@ class PointcloudBasedOccupancyGridMapNode : public rclcpp::Node std::unique_ptr occupancy_grid_map_ptr_; std::unique_ptr occupancy_grid_map_updater_ptr_; + CudaPointCloud2 raw_pointcloud_; + CudaPointCloud2 obstacle_pointcloud_; + // ROS Parameters std::string map_frame_; std::string base_link_frame_; From 5b65b6f8095e69719fa3d4890224c6864dc85f29 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Tue, 3 Dec 2024 13:23:59 +0900 Subject: [PATCH 02/19] chore: fixed cspells Signed-off-by: Kenzo Lobos-Tsunekawa --- .../CMakeLists.txt | 1 + .../updater/binary_bayes_filter_updater_kernel.hpp | 2 +- .../updater/log_odds_bayes_filter_updater_kernel.hpp | 1 + .../utils/cuda_pointcloud.hpp | 1 + .../lib/updater/binary_bayes_filter_updater_kernel.cu | 8 ++++---- .../lib/updater/log_odds_bayes_filter_updater_kernel.cu | 1 + 6 files changed, 9 insertions(+), 5 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt index 3275b2db7a68f..5bf7a4ef4a633 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt +++ b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt @@ -22,6 +22,7 @@ if(NOT ${CUDA_FOUND}) return() endif() +# cSpell: ignore expt list(APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr -diag-suppress 20012 --compiler-options -fPIC) set(CUDA_SEPARABLE_COMPILATION ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp index 78056c604a427..6aee755e20815 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp @@ -29,7 +29,7 @@ namespace costmap_2d void applyBBFLaunch( const std::uint8_t * z_costmap, const float * probability_matrix, const int num_states, const int free_index, const int occupied_index, const std::uint8_t free_space_value, - const std::uint8_t lethal_ostacle_value, const std::uint8_t no_information_value, + const std::uint8_t lethal_obstacle_value, const std::uint8_t no_information_value, const double v_ratio_, const int num_elements, std::uint8_t * o_costmap, cudaStream_t stream); } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp index d61cf4f241662..8c90649b0c4e0 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp @@ -26,6 +26,7 @@ namespace autoware::occupancy_grid_map namespace costmap_2d { +// cspell: ignore LOBF void applyLOBFLaunch( const std::uint8_t * z_costmap, const std::uint8_t no_information_value, const int num_elements, std::uint8_t * o_costmap, cudaStream_t stream); diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp index 247787d341ed6..541eb579e4de0 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp @@ -26,6 +26,7 @@ class CudaPointCloud2 : public sensor_msgs::msg::PointCloud2 public: void fromROSMsg(const sensor_msgs::msg::PointCloud2 & msg) { + // cSpell: knzo25 // NOTE(knzo25): replace this with the cuda blackboard later header = msg.header; fields = msg.fields; diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu index ef809372c31c0..ce18e10b19605 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu @@ -22,7 +22,7 @@ __global__ void applyBBFKernel( const std::uint8_t * z_costmap, const float * probability_matrix, const int num_states, const int free_index, const int occupied_index, const std::uint8_t free_space_value, - const std::uint8_t lethal_ostacle_value, const std::uint8_t no_information_value, + const std::uint8_t lethal_obstacle_value, const std::uint8_t no_information_value, const double v_ratio_, const int num_elements, std::uint8_t * o_costmap) { const int idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -38,7 +38,7 @@ __global__ void applyBBFKernel( float pz{}; float not_pz{}; float po_hat{}; - if (z == lethal_ostacle_value) { + if (z == lethal_obstacle_value) { pz = probability_matrix[occupied_index * num_states + occupied_index]; not_pz = probability_matrix[free_index * num_states + occupied_index]; po_hat = ((po * pz) / ((po * pz) + ((1.f - po) * not_pz))); @@ -65,14 +65,14 @@ namespace costmap_2d void applyBBFLaunch( const std::uint8_t * z_costmap, const float * probability_matrix, const int num_states, const int free_index, const int occupied_index, const std::uint8_t free_space_value, - const std::uint8_t lethal_ostacle_value, const std::uint8_t no_information_value, + const std::uint8_t lethal_obstacle_value, const std::uint8_t no_information_value, const double v_ratio_, const int num_elements, std::uint8_t * o_costmap, cudaStream_t stream) { const int num_threads = 256; const int num_blocks = (num_elements + num_threads - 1) / num_threads; applyBBFKernel<<>>( z_costmap, probability_matrix, num_states, free_index, occupied_index, free_space_value, - lethal_ostacle_value, no_information_value, v_ratio_, num_elements, o_costmap); + lethal_obstacle_value, no_information_value, v_ratio_, num_elements, o_costmap); } } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu index 068bc41d0c8d2..287903c9b6b5e 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu @@ -49,6 +49,7 @@ __host__ __device__ __forceinline__ double logOddsFusion(const double p1, const return 1.0 / (1.0 + std::exp(-log_odds)); } +// cspell: ignore LOBF __global__ void applyLOBFKernel( const std::uint8_t * z_costmap, const std::uint8_t unknown_value, const int num_elements, std::uint8_t * o_costmap) From 0b37e18147a35bbee014acb93c0c3132f2ae7409 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Tue, 3 Dec 2024 14:43:58 +0900 Subject: [PATCH 03/19] chore: unused header and variable names Signed-off-by: Kenzo Lobos-Tsunekawa --- .../binary_bayes_filter_updater_kernel.hpp | 2 -- .../log_odds_bayes_filter_updater_kernel.hpp | 2 -- .../occupancy_grid_map_projective_kernel.cu | 30 +++++++++---------- .../binary_bayes_filter_updater_kernel.cu | 6 ++-- .../log_odds_bayes_filter_updater_kernel.cu | 6 ++-- .../lib/utils/utils_kernel.cu | 13 ++++---- 6 files changed, 28 insertions(+), 31 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp index 6aee755e20815..feef6f2815a0f 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/binary_bayes_filter_updater_kernel.hpp @@ -15,8 +15,6 @@ #ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__BINARY_BAYES_FILTER_UPDATER_KERNEL_HPP_ #define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__BINARY_BAYES_FILTER_UPDATER_KERNEL_HPP_ -#include "autoware/cuda_utils/cuda_unique_ptr.hpp" - #include #include diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp index 8c90649b0c4e0..c8ff015ca27a2 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/updater/log_odds_bayes_filter_updater_kernel.hpp @@ -15,8 +15,6 @@ #ifndef AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__LOG_ODDS_BAYES_FILTER_UPDATER_KERNEL_HPP_ #define AUTOWARE__PROBABILISTIC_OCCUPANCY_GRID_MAP__UPDATER__LOG_ODDS_BAYES_FILTER_UPDATER_KERNEL_HPP_ -#include "autoware/cuda_utils/cuda_unique_ptr.hpp" - #include #include diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu index d9e5e14b8506d..d8f68c4f4cba3 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu @@ -459,10 +459,10 @@ void prepareRawTensorLaunch( const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor, cudaStream_t stream) { - const int threadsPerBlock = 256; - const int blocksPerGrid = (num_points + threadsPerBlock - 1) / threadsPerBlock; + const int threads_per_block = 256; + const int num_blocks = (num_points + threads_per_block - 1) / threads_per_block; - prepareRawTensorKernel<<>>( + prepareRawTensorKernel<<>>( input_pointcloud, num_points, points_step, angle_bins, range_bins, min_height, max_height, min_angle, angle_increment_inv, range_resolution_inv, rotation_map, translation_map, rotation_scan, translation_scan, points_tensor); @@ -477,10 +477,10 @@ void prepareObstacleTensorLaunch( const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor, cudaStream_t stream) { - const int threadsPerBlock = 256; - const int blocksPerGrid = (num_points + threadsPerBlock - 1) / threadsPerBlock; + const int threads_per_block = 256; + const int num_blocks = (num_points + threads_per_block - 1) / threads_per_block; - prepareObstacleTensorKernel<<>>( + prepareObstacleTensorKernel<<>>( input_pointcloud, num_points, points_step, angle_bins, range_bins, min_height, max_height, min_angle, angle_increment_inv, range_resolution_inv, projection_dz_threshold, translation_scan_origin, rotation_map, translation_map, rotation_scan, translation_scan, @@ -493,10 +493,10 @@ void fillEmptySpaceLaunch( const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, std::uint8_t empty_value, std::uint8_t * costmap_tensor, cudaStream_t stream) { - const int threadsPerBlock = 256; - const int blocksPerGrid = (angle_bins * range_bins + threadsPerBlock - 1) / threadsPerBlock; + const int threads_per_block = 256; + const int num_blocks = (angle_bins * range_bins + threads_per_block - 1) / threads_per_block; - fillEmptySpaceKernel<<>>( + fillEmptySpaceKernel<<>>( points_tensor, angle_bins, range_bins, map_resolution_inv, scan_origin_x, scan_origin_y, map_origin_x, map_origin_y, num_cells_x, num_cells_y, empty_value, costmap_tensor); } @@ -510,10 +510,10 @@ void fillUnknownSpaceLaunch( std::uint8_t free_space_value, std::uint8_t no_information_value, std::uint8_t * costmap_tensor, cudaStream_t stream) { - const int threadsPerBlock = 256; - const int blocksPerGrid = (angle_bins * range_bins + threadsPerBlock - 1) / threadsPerBlock; + const int threads_per_block = 256; + const int num_blocks = (angle_bins * range_bins + threads_per_block - 1) / threads_per_block; - fillUnknownSpaceKernel<<>>( + fillUnknownSpaceKernel<<>>( raw_points_tensor, obstacle_points_tensor, obstacle_separation_threshold, angle_bins, range_bins, map_resolution_inv, scan_origin_x, scan_origin_y, scan_origin_z, map_origin_x, map_origin_y, robot_pose_z, num_cells_x, num_cells_y, free_space_value, no_information_value, @@ -526,10 +526,10 @@ void fillObstaclesLaunch( const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, std::uint8_t obstacle_value, std::uint8_t * costmap_tensor, cudaStream_t stream) { - const int threadsPerBlock = 256; - const int blocksPerGrid = (angle_bins * range_bins + threadsPerBlock - 1) / threadsPerBlock; + const int threads_per_block = 256; + const int num_blocks = (angle_bins * range_bins + threads_per_block - 1) / threads_per_block; - fillObstaclesKernel<<>>( + fillObstaclesKernel<<>>( obstacle_points_tensor, distance_margin, angle_bins, range_bins, map_resolution_inv, map_origin_x, map_origin_y, num_cells_x, num_cells_y, obstacle_value, costmap_tensor); } diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu index ce18e10b19605..f4d4886eb0182 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater_kernel.cu @@ -68,9 +68,9 @@ void applyBBFLaunch( const std::uint8_t lethal_obstacle_value, const std::uint8_t no_information_value, const double v_ratio_, const int num_elements, std::uint8_t * o_costmap, cudaStream_t stream) { - const int num_threads = 256; - const int num_blocks = (num_elements + num_threads - 1) / num_threads; - applyBBFKernel<<>>( + const int threads_per_block = 256; + const int num_blocks = (num_elements + threads_per_block - 1) / threads_per_block; + applyBBFKernel<<>>( z_costmap, probability_matrix, num_states, free_index, occupied_index, free_space_value, lethal_obstacle_value, no_information_value, v_ratio_, num_elements, o_costmap); } diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu index 287903c9b6b5e..74848bcd7b23e 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu @@ -82,9 +82,9 @@ void applyLOBFLaunch( const std::uint8_t * z_costmap, const std::uint8_t no_information_value, const int num_elements, std::uint8_t * o_costmap, cudaStream_t stream) { - const int num_threads = 256; - const int num_blocks = (num_elements + num_threads - 1) / num_threads; - applyLOBFKernel<<>>( + const int threads_per_block = 256; + const int num_blocks = (num_elements + threads_per_block - 1) / threads_per_block; + applyLOBFKernel<<>>( z_costmap, no_information_value, num_elements, o_costmap); } diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu index da2ee86a3d84f..4e88a7dc6b95f 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu @@ -271,9 +271,9 @@ void transformPointCloudLaunch( const Eigen::Matrix3f & rotation, const Eigen::Vector3f & translation) { // Launch kernel - int block_size = 256; - int num_blocks = (num_points + block_size - 1) / block_size; - transformPointCloudKernel<<>>( + int threads_per_block = 256; + int num_blocks = (num_points + threads_per_block - 1) / threads_per_block; + transformPointCloudKernel<<>>( points, num_points, points_step, rotation, translation); } @@ -283,10 +283,11 @@ void copyMapRegionLaunch( unsigned int dm_lower_left_y, unsigned int dm_size_x, unsigned int region_size_x, unsigned int region_size_y, cudaStream_t stream) { - const int threadsPerBlock = 256; - const int blocksPerGrid = (region_size_x * region_size_y + threadsPerBlock - 1) / threadsPerBlock; + const int threads_per_block = 256; + const int num_blocks = + (region_size_x * region_size_y + threads_per_block - 1) / threads_per_block; - copyMapRegionKernel<<>>( + copyMapRegionKernel<<>>( source_map, sm_lower_left_x, sm_lower_left_y, sm_size_x, dest_map, dm_lower_left_x, dm_lower_left_y, dm_size_x, region_size_x, region_size_y); } From abca6e30061fbd2c0bd4b6f2980a7fccdd204a32 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Tue, 3 Dec 2024 14:57:10 +0900 Subject: [PATCH 04/19] chore: cspell fixes Signed-off-by: Kenzo Lobos-Tsunekawa --- .../utils/cuda_pointcloud.hpp | 2 +- .../lib/costmap_2d/occupancy_grid_map_projective_kernel.cu | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp index 541eb579e4de0..4449cea5b1293 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp @@ -26,7 +26,7 @@ class CudaPointCloud2 : public sensor_msgs::msg::PointCloud2 public: void fromROSMsg(const sensor_msgs::msg::PointCloud2 & msg) { - // cSpell: knzo25 + // cSpell: ignore knzo25 // NOTE(knzo25): replace this with the cuda blackboard later header = msg.header; fields = msg.fields; diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu index d8f68c4f4cba3..975a08c9d41b8 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_projective_kernel.cu @@ -316,11 +316,7 @@ __global__ void fillUnknownSpaceKernel( const float next_obs_world_px = __uint_as_float(next_obs_range_and_px & 0xFFFFFFFF); const float next_obs_world_py = __uint_as_float(next_obs_range_and_py & 0xFFFFFFFF); - float next_obs_range = - next_obs_range_int * - RANGE_DISCRETIZATION_RESOLUTION; // TODO(knzo25): replace all RANGE_DISCRETIZATION_RESOLUTION - // (1mm) instances with a constant or parameter. handle the - // unlikely overflow + float next_obs_range = next_obs_range_int * RANGE_DISCRETIZATION_RESOLUTION; float obs_to_obs_distance = next_obs_range - obs_range; const std::uint64_t next_raw_range_and_x = From 43d79eb701e9c571b964fa2abe7df2ac6dd224e4 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Tue, 3 Dec 2024 15:05:27 +0900 Subject: [PATCH 05/19] chore: cppcheck fix attempt Signed-off-by: Kenzo Lobos-Tsunekawa --- .../src/fusion/synchronized_grid_map_fusion_node.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.cpp b/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.cpp index 31cc1aa18c1b5..4829abd80a38a 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/src/fusion/synchronized_grid_map_fusion_node.cpp @@ -367,9 +367,6 @@ const OccupancyGridMapFixedBlindSpot & GridMapFusionNode::SingleFrameOccupancyFu if (time_keeper_) inner_st_ptr = std::make_unique("create_fused_map", *time_keeper_); - // init fused map with calculated origin - fused_map_->resetMaps(); - if ( !fused_map_ || fused_map_->getSizeInCellsX() != occupancy_grid_maps[0].getSizeInCellsX() || fused_map_->getSizeInCellsY() != occupancy_grid_maps[0].getSizeInCellsY() || @@ -379,6 +376,9 @@ const OccupancyGridMapFixedBlindSpot & GridMapFusionNode::SingleFrameOccupancyFu occupancy_grid_maps[0].getResolution()); } + // init fused map with calculated origin + fused_map_->resetMaps(); + fused_map_->updateOrigin( gridmap_origin.position.x - fused_map_->getSizeInMetersX() / 2, gridmap_origin.position.y - fused_map_->getSizeInMetersY() / 2); From 7905835dabef69d21d0e839d5a3a6842fcf5d242 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Tue, 3 Dec 2024 15:48:32 +0900 Subject: [PATCH 06/19] fix: attempting to fix ci/cd regarding the cuda library Signed-off-by: Kenzo Lobos-Tsunekawa --- .../CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt index 5bf7a4ef4a633..6342ada0b976a 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt +++ b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt @@ -97,6 +97,11 @@ rclcpp_components_register_node(synchronized_grid_map_fusion EXECUTABLE synchronized_grid_map_fusion_node ) +install( + TARGETS ${PROJECT_NAME}_cuda + DESTINATION lib +) + ament_auto_package( INSTALL_TO_SHARE launch @@ -124,6 +129,7 @@ if(BUILD_TESTING) target_link_libraries(test_utils ${PCL_LIBRARIES} ${PROJECT_NAME}_common + ${PROJECT_NAME}_cuda ) target_include_directories(costmap_unit_tests PRIVATE "include") target_include_directories(fusion_policy_unit_tests PRIVATE "include") From 83871b050988bd77c0208e63494a01731a611940 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Wed, 4 Dec 2024 09:26:15 +0900 Subject: [PATCH 07/19] chore: fixed the order of the cuda check in the cmakelist Signed-off-by: Kenzo Lobos-Tsunekawa --- .../CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt index 6342ada0b976a..9e6881a5e3a75 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt +++ b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt @@ -9,6 +9,11 @@ find_package(eigen3_cmake_module REQUIRED) find_package(Eigen3 REQUIRED) find_package(PCL REQUIRED) +if(NOT ${CUDA_FOUND}) + message(WARNING "cuda was not found, so the autoware_probabilistic_occupancy_grid_map package will not be built.") + return() +endif() + include_directories( include SYSTEM @@ -17,11 +22,6 @@ include_directories( ${grid_map_ros_INCLUDE_DIRS} ) -if(NOT ${CUDA_FOUND}) - message(WARNING "cuda was not found, so the autoware_probabilistic_occupancy_grid_map package will not be built.") - return() -endif() - # cSpell: ignore expt list(APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr -diag-suppress 20012 --compiler-options -fPIC) set(CUDA_SEPARABLE_COMPILATION ON) From 9b58df0b3e3300e3a1debedaca512c5606414266 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Wed, 4 Dec 2024 09:40:33 +0900 Subject: [PATCH 08/19] fix: removed cuda as a required dep for cpu only builds Signed-off-by: Kenzo Lobos-Tsunekawa --- .../autoware_probabilistic_occupancy_grid_map/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt index 9e6881a5e3a75..58941c72fa0ad 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt +++ b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt @@ -4,7 +4,7 @@ project(autoware_probabilistic_occupancy_grid_map) find_package(autoware_cmake REQUIRED) autoware_package() -find_package(CUDA REQUIRED) +find_package(CUDA) find_package(eigen3_cmake_module REQUIRED) find_package(Eigen3 REQUIRED) find_package(PCL REQUIRED) From 5bfa282ea365c08dafbc9869502120de5e770eec Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Wed, 4 Dec 2024 10:49:09 +0900 Subject: [PATCH 09/19] fix: missing cuda linking (?) Signed-off-by: Kenzo Lobos-Tsunekawa --- .../autoware_probabilistic_occupancy_grid_map/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt index 58941c72fa0ad..04415f898e453 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt +++ b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt @@ -35,6 +35,10 @@ cuda_add_library(${PROJECT_NAME}_cuda SHARED lib/utils/utils_kernel.cu ) +target_link_libraries(${PROJECT_NAME}_cuda + ${CUDA_LIBRARIES} +) + ament_auto_add_library(${PROJECT_NAME}_common SHARED lib/costmap_2d/occupancy_grid_map_base.cpp lib/updater/binary_bayes_filter_updater.cpp @@ -127,6 +131,7 @@ if(BUILD_TESTING) lib/fusion_policy/fusion_policy.cpp ) target_link_libraries(test_utils + ${CUDA_LIBRARIES} ${PCL_LIBRARIES} ${PROJECT_NAME}_common ${PROJECT_NAME}_cuda From 02f8beb43dc216cca3d98d69ca1304df0ff831f2 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Thu, 23 Jan 2025 20:04:22 +0900 Subject: [PATCH 10/19] feat: fixed single mode, added streams, and added the restrict keyword Signed-off-by: Kenzo Lobos-Tsunekawa --- .../costmap_2d/occupancy_grid_map_base.hpp | 2 + .../utils/cuda_pointcloud.hpp | 5 ++- .../utils/utils.hpp | 2 +- .../utils/utils_kernel.hpp | 2 +- .../costmap_2d/occupancy_grid_map_base.cpp | 7 ++++ .../costmap_2d/occupancy_grid_map_fixed.cpp | 16 +++++--- .../occupancy_grid_map_fixed_kernel.cu | 39 +++++++++++-------- .../lib/utils/utils.cpp | 5 ++- .../lib/utils/utils_kernel.cu | 4 +- ...intcloud_based_occupancy_grid_map_node.cpp | 13 +++++-- 10 files changed, 60 insertions(+), 35 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp index 8272dda967114..c77345a973c50 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp @@ -104,6 +104,8 @@ class OccupancyGridMapInterface : public nav2_costmap_2d::Costmap2D const autoware::cuda_utils::CudaUniquePtr & getDeviceCostmap() const; + void copyDeviceCostmapToHost() const; + protected: rclcpp::Logger logger_{rclcpp::get_logger("pointcloud_based_occupancy_grid_map")}; rclcpp::Clock clock_{RCL_ROS_TIME}; diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp index 4449cea5b1293..8c78e2ad915b5 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp @@ -24,7 +24,7 @@ class CudaPointCloud2 : public sensor_msgs::msg::PointCloud2 { public: - void fromROSMsg(const sensor_msgs::msg::PointCloud2 & msg) + void fromROSMsgAsync(const sensor_msgs::msg::PointCloud2 & msg) { // cSpell: ignore knzo25 // NOTE(knzo25): replace this with the cuda blackboard later @@ -42,10 +42,11 @@ class CudaPointCloud2 : public sensor_msgs::msg::PointCloud2 data = autoware::cuda_utils::make_unique(capacity_); } - cudaMemcpy(data.get(), msg.data.data(), msg.data.size(), cudaMemcpyHostToDevice); + cudaMemcpyAsync(data.get(), msg.data.data(), msg.data.size(), cudaMemcpyHostToDevice, stream); } autoware::cuda_utils::CudaUniquePtr data; + cudaStream_t stream; private: std::size_t capacity_{0}; diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils.hpp index d8e8451d8788f..9a9a441af3d60 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils.hpp @@ -56,7 +56,7 @@ bool transformPointcloud( const sensor_msgs::msg::PointCloud2 & input, const tf2_ros::Buffer & tf2, const std::string & target_frame, sensor_msgs::msg::PointCloud2 & output); -bool transformPointcloud( +bool transformPointcloudAsync( CudaPointCloud2 & input, const tf2_ros::Buffer & tf2, const std::string & target_frame); Eigen::Matrix4f getTransformMatrix(const geometry_msgs::msg::Pose & pose); diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp index c74424f900b91..2ec06c23066a3 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp @@ -43,7 +43,7 @@ void copyMapRegionLaunch( void transformPointCloudLaunch( std::uint8_t * points, std::size_t num_points, std::size_t points_step, - const Eigen::Matrix3f & rotation, const Eigen::Vector3f & translation); + const Eigen::Matrix3f & rotation, const Eigen::Vector3f & translation, cudaStream_t stream); } // namespace utils } // namespace autoware::occupancy_grid_map diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp index 1b242f22691cf..d0453fd6255ed 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp @@ -202,5 +202,12 @@ OccupancyGridMapInterface::getDeviceCostmap() const return device_costmap_; } +void OccupancyGridMapInterface::copyDeviceCostmapToHost() const +{ + cudaMemcpy( + costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t), + cudaMemcpyDeviceToHost); +} + } // namespace costmap_2d } // namespace autoware::occupancy_grid_map diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp index 53c96ec5503cd..97f36f08c48bd 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp @@ -125,12 +125,14 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud( const std::size_t num_raw_points = raw_pointcloud.width * raw_pointcloud.height; float range_resolution_inv = 1.0 / map_res; + cudaStreamSynchronize(stream_); + map_fixed::prepareTensorLaunch( reinterpret_cast(raw_pointcloud.data.get()), num_raw_points, raw_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_, max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(), device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(), - raw_points_tensor_.get(), stream_); + raw_points_tensor_.get(), raw_pointcloud.stream); const std::size_t num_obstacle_points = obstacle_pointcloud.width * obstacle_pointcloud.height; @@ -139,25 +141,27 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud( obstacle_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_, max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(), device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(), - obstacle_points_tensor_.get(), stream_); + obstacle_points_tensor_.get(), obstacle_pointcloud.stream); map_fixed::fillEmptySpaceLaunch( raw_points_tensor_.get(), angle_bin_size, range_bin_size, range_resolution_inv, scan_origin.position.x, scan_origin.position.y, origin_x_, origin_y_, num_cells_x, num_cells_y, - cost_value::FREE_SPACE, device_costmap_.get(), stream_); + cost_value::FREE_SPACE, device_costmap_.get(), raw_pointcloud.stream); + + cudaStreamSynchronize(obstacle_pointcloud.stream); map_fixed::fillUnknownSpaceLaunch( raw_points_tensor_.get(), obstacle_points_tensor_.get(), distance_margin_, angle_bin_size, range_bin_size, range_resolution_inv, scan_origin.position.x, scan_origin.position.y, origin_x_, origin_y_, num_cells_x, num_cells_y, cost_value::FREE_SPACE, cost_value::NO_INFORMATION, - device_costmap_.get(), stream_); + device_costmap_.get(), raw_pointcloud.stream); map_fixed::fillObstaclesLaunch( obstacle_points_tensor_.get(), distance_margin_, angle_bin_size, range_bin_size, range_resolution_inv, origin_x_, origin_y_, num_cells_x, num_cells_y, - cost_value::LETHAL_OBSTACLE, device_costmap_.get(), stream_); + cost_value::LETHAL_OBSTACLE, device_costmap_.get(), raw_pointcloud.stream); - cudaStreamSynchronize(stream_); + cudaStreamSynchronize(raw_pointcloud.stream); } void OccupancyGridMapFixedBlindSpot::initRosParam(rclcpp::Node & node) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed_kernel.cu index 6d9be7a97da50..3614e025875a3 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed_kernel.cu @@ -29,12 +29,14 @@ namespace costmap_2d::map_fixed static constexpr float RANGE_DISCRETIZATION_RESOLUTION = 0.001f; __global__ void prepareTensorKernel( - const float * input_pointcloud, const std::size_t num_points, const std::size_t points_step, - const std::size_t angle_bins, const std::size_t range_bins, const float min_height, - const float max_height, const float min_angle, const float angle_increment_inv, - const float range_resolution_inv, const Eigen::Matrix3f * rotation_map, - const Eigen::Vector3f * translation_map, const Eigen::Matrix3f * rotation_scan, - const Eigen::Vector3f * translation_scan, std::uint64_t * points_tensor) + const float * __restrict__ input_pointcloud, const std::size_t num_points, + const std::size_t points_step, const std::size_t angle_bins, const std::size_t range_bins, + const float min_height, const float max_height, const float min_angle, + const float angle_increment_inv, const float range_resolution_inv, + const Eigen::Matrix3f * __restrict__ rotation_map, + const Eigen::Vector3f * __restrict__ translation_map, + const Eigen::Matrix3f * __restrict__ rotation_scan, + const Eigen::Vector3f * __restrict__ translation_scan, std::uint64_t * __restrict__ points_tensor) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= num_points) { @@ -82,10 +84,11 @@ __global__ void prepareTensorKernel( } __global__ void fillEmptySpaceKernel( - const std::uint64_t * points_tensor, const std::size_t angle_bins, const std::size_t range_bins, - const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, - const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, - std::uint8_t empty_value, std::uint8_t * costmap_tensor) + const std::uint64_t * __restrict__ points_tensor, const std::size_t angle_bins, + const std::size_t range_bins, const float map_resolution_inv, const float scan_origin_x, + const float scan_origin_y, const float map_origin_x, const float map_origin_y, + const int num_cells_x, const int num_cells_y, std::uint8_t empty_value, + std::uint8_t * __restrict__ costmap_tensor) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= angle_bins * range_bins) return; @@ -116,11 +119,13 @@ __global__ void fillEmptySpaceKernel( } __global__ void fillUnknownSpaceKernel( - const std::uint64_t * raw_points_tensor, const std::uint64_t * obstacle_points_tensor, - const float distance_margin, const std::size_t angle_bins, const std::size_t range_bins, - const float map_resolution_inv, const float scan_origin_x, const float scan_origin_y, - const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, - std::uint8_t free_space_value, std::uint8_t no_information_value, std::uint8_t * costmap_tensor) + const std::uint64_t * __restrict__ raw_points_tensor, + const std::uint64_t * __restrict__ obstacle_points_tensor, const float distance_margin, + const std::size_t angle_bins, const std::size_t range_bins, const float map_resolution_inv, + const float scan_origin_x, const float scan_origin_y, const float map_origin_x, + const float map_origin_y, const int num_cells_x, const int num_cells_y, + std::uint8_t free_space_value, std::uint8_t no_information_value, + std::uint8_t * __restrict__ costmap_tensor) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= angle_bins * range_bins) return; @@ -253,10 +258,10 @@ __global__ void fillUnknownSpaceKernel( } __global__ void fillObstaclesKernel( - const std::uint64_t * obstacle_points_tensor, const float distance_margin, + const std::uint64_t * __restrict__ obstacle_points_tensor, const float distance_margin, const std::size_t angle_bins, const std::size_t range_bins, const float map_resolution_inv, const float map_origin_x, const float map_origin_y, const int num_cells_x, const int num_cells_y, - std::uint8_t obstacle_value, std::uint8_t * costmap_tensor) + std::uint8_t obstacle_value, std::uint8_t * __restrict__ costmap_tensor) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= angle_bins * range_bins) return; diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils.cpp index c57bd0e081af7..6ed51f9fccebc 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils.cpp @@ -47,7 +47,7 @@ bool transformPointcloud( return true; } -bool transformPointcloud( +bool transformPointcloudAsync( CudaPointCloud2 & input, const tf2_ros::Buffer & tf2, const std::string & target_frame) { geometry_msgs::msg::TransformStamped tf_stamped; @@ -66,7 +66,8 @@ bool transformPointcloud( Eigen::Matrix3f rotation = tf_matrix.block<3, 3>(0, 0); Eigen::Vector3f translation = tf_matrix.block<3, 1>(0, 3); transformPointCloudLaunch( - input.data.get(), input.width * input.height, input.point_step, rotation, translation); + input.data.get(), input.width * input.height, input.point_step, rotation, translation, + input.stream); input.header.frame_id = target_frame; return true; } diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu index 4e88a7dc6b95f..50b4acbf9254d 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu @@ -268,12 +268,12 @@ __global__ void copyMapRegionKernel( void transformPointCloudLaunch( std::uint8_t * points, std::size_t num_points, std::size_t points_step, - const Eigen::Matrix3f & rotation, const Eigen::Vector3f & translation) + const Eigen::Matrix3f & rotation, const Eigen::Vector3f & translation, cudaStream_t stream) { // Launch kernel int threads_per_block = 256; int num_blocks = (num_points + threads_per_block - 1) / threads_per_block; - transformPointCloudKernel<<>>( + transformPointCloudKernel<<>>( points, num_points, points_step, rotation, translation); } diff --git a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp index cf169e6005c82..06b46f5cee2ec 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp @@ -140,6 +140,9 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode( time_keeper_ = std::make_shared(time_keeper); } } + + cudaStreamCreate(&raw_pointcloud_.stream); + cudaStreamCreate(&obstacle_pointcloud_.stream); } void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( @@ -153,8 +156,8 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( stop_watch_ptr_->toc("processing_time", true); } - raw_pointcloud_.fromROSMsg(*input_raw_msg); - obstacle_pointcloud_.fromROSMsg(*input_obstacle_msg); + raw_pointcloud_.fromROSMsgAsync(*input_raw_msg); + obstacle_pointcloud_.fromROSMsgAsync(*input_obstacle_msg); // if scan_origin_frame_ is "", replace it with raw_pointcloud_.header.frame_id if (scan_origin_frame_.empty()) { @@ -165,12 +168,12 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( if (use_height_filter_) { // Make sure that the frame is base_link if (raw_pointcloud_.header.frame_id != base_link_frame_) { - if (!utils::transformPointcloud(raw_pointcloud_, *tf2_, base_link_frame_)) { + if (!utils::transformPointcloudAsync(raw_pointcloud_, *tf2_, base_link_frame_)) { return; } } if (input_obstacle_msg->header.frame_id != base_link_frame_) { - if (!utils::transformPointcloud(obstacle_pointcloud_, *tf2_, base_link_frame_)) { + if (!utils::transformPointcloudAsync(obstacle_pointcloud_, *tf2_, base_link_frame_)) { return; } } @@ -214,6 +217,8 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( if (time_keeper_) inner_st_ptr = std::make_unique("publish_occupancy_grid_map", *time_keeper_); + occupancy_grid_map_ptr_->copyDeviceCostmapToHost(); + // publish occupancy_grid_map_pub_->publish(OccupancyGridMapToMsgPtr( map_frame_, input_raw_msg->header.stamp, robot_pose.position.z, From 5c14fc06f8c34cc1f9b27b7dd61ec746cdcce9ce Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Thu, 23 Jan 2025 20:10:31 +0900 Subject: [PATCH 11/19] chore: replaced a potential indetermination using an epsilon Signed-off-by: Kenzo Lobos-Tsunekawa --- .../lib/utils/utils_kernel.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu index 50b4acbf9254d..5f03e4360465e 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu @@ -150,7 +150,8 @@ inline __device__ void raytraceLine( int offset_dx = dx > 0 ? 1 : -1; // sign(dx); int offset_dy = dy > 0 ? size_x : -size_x; // sign(dy) * size_x; - float scale = (dist == 0.0) ? 1.0 : min(1.f, max_length / dist); + constexpr float epsilon = 1e-6; + float scale = (dist < epsilon) ? 1.0 : min(1.f, max_length / dist); // if x is dominant if (abs_dx >= abs_dy) { int error_y = abs_dx / 2; From 292550df0148e49150a07c28d39ada8f4824e648 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos Tsunekawa Date: Thu, 23 Jan 2025 20:39:25 +0900 Subject: [PATCH 12/19] Update perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu Co-authored-by: Yoshi Ri --- .../lib/updater/log_odds_bayes_filter_updater_kernel.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu index 74848bcd7b23e..ba96ea0599745 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu @@ -33,7 +33,7 @@ __host__ __device__ __forceinline__ double convertCharToProbability(const std::u __host__ __device__ __forceinline__ std::uint8_t convertProbabilityToChar(const double value) { - return static_cast(value * 255.0); + return static_cast(std::max(0.0, std::min(1.0, value)) * 255.0);; } __host__ __device__ __forceinline__ double logOddsFusion(const double p1, const double p2) From 4ef247ae22ef381e1edbf5fef43b66902fb514c4 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos Tsunekawa Date: Thu, 23 Jan 2025 20:41:46 +0900 Subject: [PATCH 13/19] Update perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu Co-authored-by: Yoshi Ri --- .../lib/updater/log_odds_bayes_filter_updater_kernel.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu index ba96ea0599745..08f96116392b8 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu @@ -43,7 +43,7 @@ __host__ __device__ __forceinline__ double logOddsFusion(const double p1, const const double p1_norm = std::max(EPSILON_PROB, std::min(1.0 - EPSILON_PROB, p1)); log_odds += std::log(p1_norm / (1.0 - p1_norm)); - const double p2_norm = std::max(EPSILON_PROB, std::min(1.0 - EPSILON_PROB, p1)); + const double p2_norm = std::max(EPSILON_PROB, std::min(1.0 - EPSILON_PROB, p2)); log_odds += std::log(p2_norm / (1.0 - p2_norm)); return 1.0 / (1.0 + std::exp(-log_odds)); From a33d3d97cb37d5f2f57fc492fbe8102d90b6f30f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 23 Jan 2025 11:42:04 +0000 Subject: [PATCH 14/19] style(pre-commit): autofix --- .../lib/updater/log_odds_bayes_filter_updater_kernel.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu index 08f96116392b8..16705adac31b9 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater_kernel.cu @@ -33,7 +33,8 @@ __host__ __device__ __forceinline__ double convertCharToProbability(const std::u __host__ __device__ __forceinline__ std::uint8_t convertProbabilityToChar(const double value) { - return static_cast(std::max(0.0, std::min(1.0, value)) * 255.0);; + return static_cast(std::max(0.0, std::min(1.0, value)) * 255.0); + ; } __host__ __device__ __forceinline__ double logOddsFusion(const double p1, const double p2) From ea461308f5c992e769218cfa56da90e90b36f147 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Thu, 23 Jan 2025 20:43:19 +0900 Subject: [PATCH 15/19] chore: added bound checkings in the update origin kernel Signed-off-by: Kenzo Lobos-Tsunekawa --- .../utils/utils_kernel.hpp | 7 ++++--- .../costmap_2d/occupancy_grid_map_base.cpp | 19 +++++++++++++---- .../lib/utils/utils_kernel.cu | 21 +++++++++++-------- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp index 2ec06c23066a3..a335a4a6e5b93 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/utils_kernel.hpp @@ -37,9 +37,10 @@ void __device__ raytrace( void copyMapRegionLaunch( const std::uint8_t * source_map, unsigned int sm_lower_left_x, unsigned int sm_lower_left_y, - unsigned int sm_size_x, std::uint8_t * dest_map, unsigned int dm_lower_left_x, - unsigned int dm_lower_left_y, unsigned int dm_size_x, unsigned int region_size_x, - unsigned int region_size_y, cudaStream_t stream); + unsigned int sm_size_x, unsigned int sm_size_y, std::uint8_t * dest_map, + unsigned int dm_lower_left_x, unsigned int dm_lower_left_y, unsigned int dm_size_x, + unsigned int dm_size_y, unsigned int region_size_x, unsigned int region_size_y, + cudaStream_t stream); void transformPointCloudLaunch( std::uint8_t * points, std::size_t num_points, std::size_t points_step, diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp index d0453fd6255ed..4325eb72d4c91 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp @@ -133,8 +133,8 @@ void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_ori if (use_cuda_) { copyMapRegionLaunch( - device_costmap_.get(), lower_left_x, lower_left_y, size_x_, device_costmap_aux_.get(), 0, 0, - cell_size_x, cell_size_x, cell_size_y, stream_); + device_costmap_.get(), lower_left_x, lower_left_y, size_x_, size_y_, + device_costmap_aux_.get(), 0, 0, cell_size_x, cell_size_y, cell_size_x, cell_size_y, stream_); cudaMemset( device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t)); @@ -158,11 +158,22 @@ void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_ori int start_x{lower_left_x - cell_ox}; int start_y{lower_left_y - cell_oy}; + if ( + start_x < 0 || start_y < 0 || start_x + cell_size_x > size_x_ || + start_y + cell_size_y > size_y_) { + RCLCPP_ERROR( + rclcpp::get_logger("pointcloud_based_occupancy_grid_map"), + "update coordinates are negative or out of bounds: start.x=%d, start.y=%d, cell_size.x=%d, " + "cell_size.y=%d size_x:%d, size_y=%d", + start_x, start_y, cell_size_x, cell_size_y, size_x_, size_y_); + return; + } + // now we want to copy the overlapping information back into the map, but in its new location if (use_cuda_) { copyMapRegionLaunch( - device_costmap_aux_.get(), 0, 0, cell_size_x, device_costmap_.get(), start_x, start_y, - size_x_, cell_size_x, cell_size_y, stream_); + device_costmap_aux_.get(), 0, 0, cell_size_x, cell_size_y, device_costmap_.get(), start_x, + start_y, size_x_, size_y_, cell_size_x, cell_size_y, stream_); } else { copyMapRegion( local_map, 0, 0, cell_size_x, costmap_, start_x, start_y, size_x_, cell_size_x, cell_size_y); diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu index 5f03e4360465e..696466fa89fbe 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/utils/utils_kernel.cu @@ -249,9 +249,9 @@ __global__ void transformPointCloudKernel( __global__ void copyMapRegionKernel( const std::uint8_t * source_map, unsigned int sm_lower_left_x, unsigned int sm_lower_left_y, - unsigned int sm_size_x, std::uint8_t * dest_map, unsigned int dm_lower_left_x, - unsigned int dm_lower_left_y, unsigned int dm_size_x, unsigned int region_size_x, - unsigned int region_size_y) + unsigned int sm_size_x, unsigned int sm_size_y, std::uint8_t * dest_map, + unsigned int dm_lower_left_x, unsigned int dm_lower_left_y, unsigned int dm_size_x, + unsigned int dm_size_y, unsigned int region_size_x, unsigned int region_size_y) { const int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= region_size_x * region_size_y) return; @@ -264,7 +264,9 @@ __global__ void copyMapRegionKernel( const int dm_index = dm_lower_left_y * dm_size_x + dm_lower_left_x + region_y * dm_size_x + region_x; - dest_map[dm_index] = source_map[sm_index]; + if (sm_index < sm_size_x * sm_size_y && dm_index < dm_size_x * dm_size_y) { + dest_map[dm_index] = source_map[sm_index]; + } } void transformPointCloudLaunch( @@ -280,17 +282,18 @@ void transformPointCloudLaunch( void copyMapRegionLaunch( const std::uint8_t * source_map, unsigned int sm_lower_left_x, unsigned int sm_lower_left_y, - unsigned int sm_size_x, std::uint8_t * dest_map, unsigned int dm_lower_left_x, - unsigned int dm_lower_left_y, unsigned int dm_size_x, unsigned int region_size_x, - unsigned int region_size_y, cudaStream_t stream) + unsigned int sm_size_x, unsigned int sm_size_y, std::uint8_t * dest_map, + unsigned int dm_lower_left_x, unsigned int dm_lower_left_y, unsigned int dm_size_x, + unsigned int dm_size_y, unsigned int region_size_x, unsigned int region_size_y, + cudaStream_t stream) { const int threads_per_block = 256; const int num_blocks = (region_size_x * region_size_y + threads_per_block - 1) / threads_per_block; copyMapRegionKernel<<>>( - source_map, sm_lower_left_x, sm_lower_left_y, sm_size_x, dest_map, dm_lower_left_x, - dm_lower_left_y, dm_size_x, region_size_x, region_size_y); + source_map, sm_lower_left_x, sm_lower_left_y, sm_size_x, sm_size_y, dest_map, dm_lower_left_x, + dm_lower_left_y, dm_size_x, dm_size_y, region_size_x, region_size_y); } } // namespace utils From 235effb9f6f9f547f1d65ae877f4874100e89381 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Thu, 23 Jan 2025 20:48:20 +0900 Subject: [PATCH 16/19] chore: disabled tests since universe does not support cuda in ci/cd Signed-off-by: Kenzo Lobos-Tsunekawa --- .../CMakeLists.txt | 52 ++++++++++--------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt index 04415f898e453..e4acdb797bf3e 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt +++ b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt @@ -113,29 +113,31 @@ ament_auto_package( ) # test -if(BUILD_TESTING) - # launch_testing - find_package(launch_testing_ament_cmake REQUIRED) - add_launch_test(test/test_pointcloud_based_method.py) - add_launch_test(test/test_synchronized_grid_map_fusion_node.py) - - # gtest - ament_add_gtest(test_utils - test/test_utils.cpp - ) - ament_add_gtest(costmap_unit_tests - test/cost_value_test.cpp - ) - ament_add_gtest(fusion_policy_unit_tests - test/fusion_policy_test.cpp - lib/fusion_policy/fusion_policy.cpp - ) - target_link_libraries(test_utils - ${CUDA_LIBRARIES} - ${PCL_LIBRARIES} - ${PROJECT_NAME}_common - ${PROJECT_NAME}_cuda - ) - target_include_directories(costmap_unit_tests PRIVATE "include") - target_include_directories(fusion_policy_unit_tests PRIVATE "include") +# Temporary disabled, tracked by: +# https://github.com/autowarefoundation/autoware.universe/issues/7724 +#if(BUILD_TESTING) +# # launch_testing +# find_package(launch_testing_ament_cmake REQUIRED) +# add_launch_test(test/test_pointcloud_based_method.py) +# add_launch_test(test/test_synchronized_grid_map_fusion_node.py) +# +# # gtest +# ament_add_gtest(test_utils +# test/test_utils.cpp +# ) +# ament_add_gtest(costmap_unit_tests +# test/cost_value_test.cpp +# ) +# ament_add_gtest(fusion_policy_unit_tests +# test/fusion_policy_test.cpp +# lib/fusion_policy/fusion_policy.cpp +# ) +# target_link_libraries(test_utils +# ${CUDA_LIBRARIES} +# ${PCL_LIBRARIES} +# ${PROJECT_NAME}_common +# ${PROJECT_NAME}_cuda +# ) +# target_include_directories(costmap_unit_tests PRIVATE "include") +# target_include_directories(fusion_policy_unit_tests PRIVATE "include") endif() From 1cecc33c0f5dc8feb94bfff862abb04df409f456 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Thu, 23 Jan 2025 20:49:53 +0900 Subject: [PATCH 17/19] chore: added me as a maintainer Signed-off-by: Kenzo Lobos-Tsunekawa --- perception/autoware_probabilistic_occupancy_grid_map/package.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/package.xml b/perception/autoware_probabilistic_occupancy_grid_map/package.xml index 8e956b7570db4..06c3ed75f2758 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/package.xml +++ b/perception/autoware_probabilistic_occupancy_grid_map/package.xml @@ -7,6 +7,7 @@ Yukihiro Saito Yoshi Ri Mamoru Sobue + Kenzo Lobos-Tsunekawa Apache License 2.0 Yukihiro Saito From 3ad1665d04e9fe04b938bd46b95c453d8f78b97c Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Thu, 23 Jan 2025 21:01:19 +0900 Subject: [PATCH 18/19] fix: missedn the end in the cmake Signed-off-by: Kenzo Lobos-Tsunekawa --- .../autoware_probabilistic_occupancy_grid_map/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt index e4acdb797bf3e..7dda1604f10f3 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt +++ b/perception/autoware_probabilistic_occupancy_grid_map/CMakeLists.txt @@ -140,4 +140,4 @@ ament_auto_package( # ) # target_include_directories(costmap_unit_tests PRIVATE "include") # target_include_directories(fusion_policy_unit_tests PRIVATE "include") -endif() +#endif() From f975819b710accf1879f793ed9029d3e47b5ffc0 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Fri, 24 Jan 2025 11:51:17 +0900 Subject: [PATCH 19/19] chore: moved the boudnary checks to only the cuda version since the cpu version uses the upstream logic Signed-off-by: Kenzo Lobos-Tsunekawa --- .../costmap_2d/occupancy_grid_map_base.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp index 4325eb72d4c91..f4d290bd70d7c 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp @@ -158,19 +158,19 @@ void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_ori int start_x{lower_left_x - cell_ox}; int start_y{lower_left_y - cell_oy}; - if ( - start_x < 0 || start_y < 0 || start_x + cell_size_x > size_x_ || - start_y + cell_size_y > size_y_) { - RCLCPP_ERROR( - rclcpp::get_logger("pointcloud_based_occupancy_grid_map"), - "update coordinates are negative or out of bounds: start.x=%d, start.y=%d, cell_size.x=%d, " - "cell_size.y=%d size_x:%d, size_y=%d", - start_x, start_y, cell_size_x, cell_size_y, size_x_, size_y_); - return; - } - // now we want to copy the overlapping information back into the map, but in its new location if (use_cuda_) { + if ( + start_x < 0 || start_y < 0 || start_x + cell_size_x > size_x_ || + start_y + cell_size_y > size_y_) { + RCLCPP_ERROR( + rclcpp::get_logger("pointcloud_based_occupancy_grid_map"), + "update coordinates are negative or out of bounds: start.x=%d, start.y=%d, cell_size.x=%d, " + "cell_size.y=%d size_x:%d, size_y=%d", + start_x, start_y, cell_size_x, cell_size_y, size_x_, size_y_); + return; + } + copyMapRegionLaunch( device_costmap_aux_.get(), 0, 0, cell_size_x, cell_size_y, device_costmap_.get(), start_x, start_y, size_x_, size_y_, cell_size_x, cell_size_y, stream_);