working on multisectorcalib

themachinethatgoesping · Oct 9, 2024 · bf26e47 · bf26e47
1 parent ecf3932
commit bf26e47
Show file tree

Hide file tree

Showing 3 changed files with 180 additions and 110 deletions.
diff --git a/src/pymodule/amplitudecorrection/functions/module.hpp b/src/pymodule/amplitudecorrection/functions/module.hpp
@@ -17,132 +17,136 @@ namespace py_functions {
 #define DOC_functions(ARG)                                                                         \
     DOC(themachinethatgoesping, algorithms, amplitudecorrection, functions, ARG)
 
-void init_m_functions(pybind11::module& m)
+template<typename t_float>
+void init_functions(pybind11::module& m)
 {
     namespace py = pybind11;
-
-    pybind11::module submodule = m.def_submodule("functions");
-
-    submodule.doc() = "Submodule that holds functions used for amplitude corrections";
-
     using namespace amplitudecorrection::functions;
 
     // sound velocity
-    submodule.def("calc_sound_velocity",
-                  &calc_sound_velocity,
-                  DOC_functions(calc_sound_velocity),
-                  py::arg("depth_m"),
-                  py::arg("temperature_c"),
-                  py::arg("salinity_psu"),
-                  py::arg("latitude")  = 0.0,
-                  py::arg("longitude") = 0.0);
+    m.def("calc_sound_velocity",
+          &calc_sound_velocity,
+          DOC_functions(calc_sound_velocity),
+          py::arg("depth_m"),
+          py::arg("temperature_c"),
+          py::arg("salinity_psu"),
+          py::arg("latitude")  = 0.0,
+          py::arg("longitude") = 0.0);
 
     // absorption
-    submodule.def("calc_absorption_coefficient_db_m",
-                  &calc_absorption_coefficient_db_m,
-                  DOC_functions(calc_absorption_coefficient_db_m),
-                  py::arg("frequency_hz"),
-                  py::arg("depth_m"),
-                  py::arg("sound_velocity_m_s"),
-                  py::arg("temperature_c"),
-                  py::arg("salinity_psu"),
-                  py::arg("pH") = 8);
+    m.def("calc_absorption_coefficient_db_m",
+          &calc_absorption_coefficient_db_m,
+          DOC_functions(calc_absorption_coefficient_db_m),
+          py::arg("frequency_hz"),
+          py::arg("depth_m"),
+          py::arg("sound_velocity_m_s"),
+          py::arg("temperature_c"),
+          py::arg("salinity_psu"),
+          py::arg("pH") = 8);
 
     // range correction
-    submodule.def("get_sample_numbers_plus_half",
-                  &get_sample_numbers_plus_half<xt::pytensor<float, 1>, int32_t>,
-                  DOC_functions(get_sample_numbers_plus_half),
-                  py::arg("first_sample_nr"),
-                  py::arg("last_sample_nr"),
-                  py::arg("step") = 1);
-
-    submodule.def("approximate_range_factor",
-                  &approximate_range_factor<float>,
-                  DOC_functions(approximate_range_factor),
-                  py::arg("sample_interval_s"),
-                  py::arg("sound_velocity_m_s"));
-
-    submodule.def("approximate_ranges",
-                  py::overload_cast<float, float, int32_t, int32_t, int32_t>(
-                      &approximate_ranges<xt::pytensor<float, 1>, int32_t>),
-                  DOC_functions(approximate_ranges),
-                  py::arg("sample_interval_s"),
-                  py::arg("sound_velocity_m_s"),
-                  py::arg("first_sample_nr"),
-                  py::arg("last_sample_nr"),
-                  py::arg("step") = 1);
-
-    submodule.def("approximate_ranges",
-                  py::overload_cast<float, float, const xt::pytensor<int32_t, 1>&>(
-                      &approximate_ranges<xt::pytensor<float, 1>, xt::pytensor<int32_t, 1>>),
-                  DOC_functions(approximate_ranges_2),
-                  py::arg("sample_interval_s"),
-                  py::arg("sound_velocity_m_s"),
-                  py::arg("sample_numbers"));
-
-    submodule.def("compute_cw_range_correction",
-                  &compute_cw_range_correction<xt::pytensor<float, 1>>,
-                  DOC_functions(compute_cw_range_correction),
-                  py::arg("ranges_m"),
-                  py::arg("absorption_db_m"),
-                  py::arg("tvg_factor"));
-
-    submodule.def("apply_beam_sample_correction",
-                  &apply_beam_sample_correction<xt::pytensor<float, 2>, xt::pytensor<float, 1>>,
-                  DOC_functions(apply_beam_sample_correction),
-                  py::arg("wci"),
-                  py::arg("per_beam_offset"),
-                  py::arg("per_sample_offset"),
-                  py::arg("mp_cores") = 1);
-
-    submodule.def("apply_beam_correction",
-                  &apply_beam_correction<xt::pytensor<float, 2>, xt::pytensor<float, 1>>,
-                  DOC_functions(apply_beam_correction),
-                  py::arg("wci"),
-                  py::arg("per_beam_offset"),
-                  py::arg("mp_cores") = 1);
-
-    submodule.def("apply_sample_correction",
-                  &apply_sample_correction<xt::pytensor<float, 2>, xt::pytensor<float, 1>>,
-                  DOC_functions(apply_sample_correction),
-                  py::arg("wci"),
-                  py::arg("per_sample_offset"),
-                  py::arg("mp_cores") = 1);
-
-    submodule.def(
-        "apply_beam_sample_correction_loop",
-        &apply_beam_sample_correction_loop<xt::pytensor<float, 2>, xt::pytensor<float, 1>>,
-        DOC_functions(apply_beam_sample_correction_loop),
-        py::arg("wci"),
-        py::arg("per_beam_offset"),
-        py::arg("per_sample_offset"),
-        py::arg("mp_cores") = 1);
-
-    submodule.def(
+    m.def("get_sample_numbers_plus_half",
+          &get_sample_numbers_plus_half<xt::pytensor<t_float, 1>, int32_t>,
+          DOC_functions(get_sample_numbers_plus_half),
+          py::arg("first_sample_nr"),
+          py::arg("last_sample_nr"),
+          py::arg("step") = 1);
+
+    m.def("approximate_range_factor",
+          &approximate_range_factor<t_float>,
+          DOC_functions(approximate_range_factor),
+          py::arg("sample_interval_s"),
+          py::arg("sound_velocity_m_s"));
+
+    m.def("approximate_ranges",
+          py::overload_cast<t_float, t_float, int32_t, int32_t, int32_t>(
+              &approximate_ranges<xt::pytensor<t_float, 1>, int32_t>),
+          DOC_functions(approximate_ranges),
+          py::arg("sample_interval_s"),
+          py::arg("sound_velocity_m_s"),
+          py::arg("first_sample_nr"),
+          py::arg("last_sample_nr"),
+          py::arg("step") = 1);
+
+    m.def("approximate_ranges",
+          py::overload_cast<t_float, t_float, const xt::pytensor<int32_t, 1>&>(
+              &approximate_ranges<xt::pytensor<t_float, 1>, xt::pytensor<int32_t, 1>>),
+          DOC_functions(approximate_ranges_2),
+          py::arg("sample_interval_s"),
+          py::arg("sound_velocity_m_s"),
+          py::arg("sample_numbers"));
+
+    m.def("compute_cw_range_correction",
+          &compute_cw_range_correction<xt::pytensor<t_float, 1>>,
+          DOC_functions(compute_cw_range_correction),
+          py::arg("ranges_m"),
+          py::arg("absorption_db_m"),
+          py::arg("tvg_factor"));
+
+    m.def("apply_beam_sample_correction",
+          &apply_beam_sample_correction<xt::pytensor<t_float, 2>, xt::pytensor<t_float, 1>>,
+          DOC_functions(apply_beam_sample_correction),
+          py::arg("wci"),
+          py::arg("per_beam_offset"),
+          py::arg("per_sample_offset"),
+          py::arg("mp_cores") = 1);
+
+    m.def("apply_beam_correction",
+          &apply_beam_correction<xt::pytensor<t_float, 2>, xt::pytensor<t_float, 1>>,
+          DOC_functions(apply_beam_correction),
+          py::arg("wci"),
+          py::arg("per_beam_offset"),
+          py::arg("mp_cores") = 1);
+
+    m.def("apply_sample_correction",
+          &apply_sample_correction<xt::pytensor<t_float, 2>, xt::pytensor<t_float, 1>>,
+          DOC_functions(apply_sample_correction),
+          py::arg("wci"),
+          py::arg("per_sample_offset"),
+          py::arg("mp_cores") = 1);
+
+    m.def("apply_beam_sample_correction_loop",
+          &apply_beam_sample_correction_loop<xt::pytensor<t_float, 2>, xt::pytensor<t_float, 1>>,
+          DOC_functions(apply_beam_sample_correction_loop),
+          py::arg("wci"),
+          py::arg("per_beam_offset"),
+          py::arg("per_sample_offset"),
+          py::arg("mp_cores") = 1);
+
+    m.def(
         "apply_beam_sample_correction_xtensor2",
-        &apply_beam_sample_correction_xtensor2<xt::pytensor<float, 2>, xt::pytensor<float, 1>>,
+        &apply_beam_sample_correction_xtensor2<xt::pytensor<t_float, 2>, xt::pytensor<t_float, 1>>,
         DOC_functions(apply_beam_sample_correction_xtensor2),
         py::arg("wci"),
         py::arg("per_beam_offset"),
         py::arg("per_sample_offset"),
         py::arg("mp_cores") = 1);
-    submodule.def(
+    m.def(
         "apply_beam_sample_correction_xtensor3",
-        &apply_beam_sample_correction_xtensor3<xt::pytensor<float, 2>, xt::pytensor<float, 1>>,
+        &apply_beam_sample_correction_xtensor3<xt::pytensor<t_float, 2>, xt::pytensor<t_float, 1>>,
         DOC_functions(apply_beam_sample_correction_xtensor3),
         py::arg("wci"),
         py::arg("per_beam_offset"),
         py::arg("per_sample_offset"),
         py::arg("mp_cores") = 1);
 
-    submodule.def(
-        "apply_beam_sample_correction_xsimd",
-        &apply_beam_sample_correction_xsimd<xt::pytensor<float, 2>, xt::pytensor<float, 1>>,
-        DOC_functions(apply_beam_sample_correction_xsimd),
-        py::arg("wci"),
-        py::arg("per_beam_offset"),
-        py::arg("per_sample_offset"),
-        py::arg("mp_cores") = 1);
+    m.def("apply_beam_sample_correction_xsimd",
+          &apply_beam_sample_correction_xsimd<xt::pytensor<t_float, 2>, xt::pytensor<t_float, 1>>,
+          DOC_functions(apply_beam_sample_correction_xsimd),
+          py::arg("wci"),
+          py::arg("per_beam_offset"),
+          py::arg("per_sample_offset"),
+          py::arg("mp_cores") = 1);
+}
+
+void init_m_functions(pybind11::module& m)
+{
+    pybind11::module submodule = m.def_submodule("functions");
+
+    submodule.doc() = "M that holds functions used for amplitude corrections";
+
+    init_functions<float>(submodule);
+    init_functions<double>(submodule);
 }
 
 } // namespace py_functions

diff --git a/...nethatgoesping/algorithms/amplitudecorrection/functions/.docstrings/wcicorrection.doc.hpp b/...nethatgoesping/algorithms/amplitudecorrection/functions/.docstrings/wcicorrection.doc.hpp
@@ -1,4 +1,4 @@
-//sourcehash: a302a9f48e1b20efdf9ab01a6bae5c8fea7eea95c0e2aafc2ed22d54e3dbd97a
+//sourcehash: 54bf0668598cbd418b0b6ab4ab8932dc0359c8ffad2744f7d4c9db5ec3c64f26
 
 /*
   This file contains docstrings for use in the Python bindings.
@@ -56,6 +56,19 @@ static const char *__doc_themachinethatgoesping_algorithms_amplitudecorrection_f
 
 static const char *__doc_themachinethatgoesping_algorithms_amplitudecorrection_functions_assert_wci_beam_sample_shape = R"doc()doc";
 
+static const char *__doc_themachinethatgoesping_algorithms_amplitudecorrection_functions_inplace_beam_correction = R"doc()doc";
+
+static const char *__doc_themachinethatgoesping_algorithms_amplitudecorrection_functions_inplace_beam_sample_correction = R"doc()doc";
+
+static const char *__doc_themachinethatgoesping_algorithms_amplitudecorrection_functions_inplace_sample_correction =
+R"doc(Template parameter ``t_xtensor_2d``:
+    $Template parameter ``t_xtensor_1d``:
+
+Parameter ``wci``:
+    $Parameter ``per_sample_offset``:
+
+Parameter ``mp_cores``:)doc";
+
 #if defined(__GNUG__)
 #pragma GCC diagnostic pop
 #endif

diff --git a/src/themachinethatgoesping/algorithms/amplitudecorrection/functions/wcicorrection.hpp b/src/themachinethatgoesping/algorithms/amplitudecorrection/functions/wcicorrection.hpp
@@ -80,6 +80,21 @@ inline t_xtensor_2d apply_beam_sample_correction(const t_xtensor_2d& wci,
     return result;
 }
 
+template<tools::helper::c_xtensor t_xtensor_2d, tools::helper::c_xtensor t_xtensor_1d>
+inline void inplace_beam_sample_correction([[maybe_unused]] t_xtensor_2d& wci,
+                                           const t_xtensor_1d&            per_beam_offset,
+                                           const t_xtensor_1d&            per_sample_offset,
+                                           int                            mp_cores = 1)
+{
+    assert_wci_beam_sample_shape(wci, per_beam_offset, per_sample_offset);
+
+    // Apply the range correction to each sample
+#pragma omp parallel for num_threads(mp_cores)
+    for (unsigned int bi = 0; bi < per_beam_offset.size(); ++bi)
+        xt::row(wci, bi) =
+            xt::eval(xt::row(wci, bi) + (per_beam_offset.unchecked(bi) + per_sample_offset));
+}
+
 template<tools::helper::c_xtensor t_xtensor_2d, tools::helper::c_xtensor t_xtensor_1d>
 inline t_xtensor_2d apply_beam_correction(const t_xtensor_2d& wci,
                                           const t_xtensor_1d& per_beam_offset,
@@ -96,6 +111,18 @@ inline t_xtensor_2d apply_beam_correction(const t_xtensor_2d& wci,
     return result;
 }
 
+template<tools::helper::c_xtensor t_xtensor_2d, tools::helper::c_xtensor t_xtensor_1d>
+inline t_xtensor_2d inplace_beam_correction([[maybe_unused]] t_xtensor_2d& wci,
+                                            const t_xtensor_1d&            per_beam_offset,
+                                            int                            mp_cores = 1)
+{
+    assert_wci_axis_shape(wci, per_beam_offset, 0, "per_beam_offset");
+
+#pragma omp parallel for num_threads(mp_cores)
+    for (unsigned int bi = 0; bi < per_beam_offset.size(); ++bi)
+        xt::row(wci, bi) = xt::eval(xt::row(wci, bi) + per_beam_offset.unchecked(bi));
+}
+
 template<tools::helper::c_xtensor t_xtensor_2d, tools::helper::c_xtensor t_xtensor_1d>
 inline t_xtensor_2d apply_sample_correction(const t_xtensor_2d&  wci,
                                             const t_xtensor_1d&  per_sample_offset,
@@ -106,6 +133,27 @@ inline t_xtensor_2d apply_sample_correction(const t_xtensor_2d&  wci,
     return wci + xt::view(per_sample_offset, xt::newaxis(), xt::all());
 }
 
+/**
+ * @brief
+ *
+ * @tparam t_xtensor_2d
+ * @tparam t_xtensor_1d
+ * @param wci
+ * @param per_sample_offset
+ * @param mp_cores
+ */
+template<tools::helper::c_xtensor t_xtensor_2d, tools::helper::c_xtensor t_xtensor_1d>
+void inplace_sample_correction(
+    [[maybe_unused]] t_xtensor_2d& wci, //[[maybe_unused]] is used here because pybind11-mkdoc fails
+                                        // of no specifier is used for this template type
+    const t_xtensor_1d&  per_sample_offset,
+    [[maybe_unused]] int mp_cores = 1)
+{
+    assert_wci_axis_shape(wci, per_sample_offset, 1, "per_sample_offset");
+
+    wci = xt::eval(wci + xt::view(per_sample_offset, xt::newaxis(), xt::all()));
+}
+
 //--- these functions are for benchmarking purposes ---
 
 template<tools::helper::c_xtensor t_xtensor_2d, tools::helper::c_xtensor t_xtensor_1d>
@@ -122,7 +170,6 @@ inline t_xtensor_2d apply_beam_sample_correction_loop(const t_xtensor_2d& wci,
     for (unsigned int bi = 0; bi < per_beam_offset.size(); ++bi)
     {
         auto beam_offset = per_beam_offset.unchecked(bi);
-#pragma GCC ivdep
         for (unsigned int si = 0; si < per_sample_offset.size(); ++si)
             result.unchecked(bi, si) =
                 wci.unchecked(bi, si) + beam_offset + per_sample_offset.unchecked(si);
@@ -168,14 +215,20 @@ inline t_xtensor_2d apply_beam_sample_correction_xsimd(const t_xtensor_2d& wci,
                                                        const t_xtensor_1d& per_sample_offset,
                                                        int                 mp_cores = 1)
 {
+    using t_float  = tools::helper::xtensor_datatype<t_xtensor_2d>::type;
+    using t_float1 = tools::helper::xtensor_datatype<t_xtensor_1d>::type;
+
+    static_assert(std::is_same<t_float, t_float1>::value,
+                  "float type for all tensors must be the same type");
+
     assert_wci_beam_sample_shape(wci, per_beam_offset, per_sample_offset);
 
     // Get the size of the vectors
     int64_t nbeams   = per_beam_offset.size();
     int64_t nsamples = per_sample_offset.size();
 
     // Determine the SIMD batch size
-    int64_t simd_size          = xsimd::batch<float>::size;
+    int64_t simd_size          = xsimd::batch<t_float>::size;
     int64_t max_simd_sample_nr = nsamples - simd_size;
 
     t_xtensor_2d result = xt::empty_like(wci);
@@ -185,18 +238,18 @@ inline t_xtensor_2d apply_beam_sample_correction_xsimd(const t_xtensor_2d& wci,
     for (int64_t bi = 0; bi < nbeams; ++bi)
     {
         // Load the beam offset for the current row
-        float beam_offset = per_beam_offset.unchecked(bi);
+        t_float beam_offset = per_beam_offset.unchecked(bi);
 
         // Process the inner loop in chunks of SIMD batch size
         int64_t si = 0;
         for (; si < max_simd_sample_nr; si += simd_size)
         {
             // Load the sample offsets into a SIMD batch
-            xsimd::batch<float> sample_offset =
+            xsimd::batch<t_float> sample_offset =
                 xsimd::load_aligned(&(per_sample_offset.unchecked(si)));
 
             // Load the current values from the wci matrix
-            xsimd::batch<float> wci_values = xsimd::load_aligned(&wci.unchecked(bi, si));
+            xsimd::batch<t_float> wci_values = xsimd::load_aligned(&wci.unchecked(bi, si));
 
             // Perform the SIMD addition
             wci_values += beam_offset + sample_offset;