From f8284e9525bedeacc35b05e3828c6f7077be889f Mon Sep 17 00:00:00 2001 From: Jeremy Kubica <104161096+jeremykubica@users.noreply.github.com> Date: Fri, 8 Dec 2023 11:02:42 -0500 Subject: [PATCH 1/7] Create a PsiPhiArray data structure --- src/kbmod/search/bindings.cpp | 2 + src/kbmod/search/kernels.cu | 31 +++- src/kbmod/search/psi_phi_array.cpp | 220 +++++++++++++++++++++++++ src/kbmod/search/psi_phi_array_ds.h | 148 +++++++++++++++++ src/kbmod/search/psi_phi_array_utils.h | 34 ++++ tests/test_psi_phi_array.py | 143 ++++++++++++++++ 6 files changed, 576 insertions(+), 2 deletions(-) create mode 100644 src/kbmod/search/psi_phi_array.cpp create mode 100644 src/kbmod/search/psi_phi_array_ds.h create mode 100644 src/kbmod/search/psi_phi_array_utils.h create mode 100644 tests/test_psi_phi_array.py diff --git a/src/kbmod/search/bindings.cpp b/src/kbmod/search/bindings.cpp index b7d8894b..fae426e9 100644 --- a/src/kbmod/search/bindings.cpp +++ b/src/kbmod/search/bindings.cpp @@ -16,6 +16,7 @@ namespace py = pybind11; #include "stack_search.cpp" #include "stamp_creator.cpp" #include "filtering.cpp" +#include "psi_phi_array.cpp" PYBIND11_MODULE(search, m) { m.attr("KB_NO_DATA") = pybind11::float_(search::NO_DATA); @@ -39,6 +40,7 @@ PYBIND11_MODULE(search, m) { search::pixel_pos_bindings(m); search::image_moments_bindings(m); search::stamp_parameters_bindings(m); + search::psi_phi_array_binding(m); // Functions from raw_image.cpp m.def("create_median_image", &search::create_median_image); m.def("create_summed_image", &search::create_summed_image); diff --git a/src/kbmod/search/kernels.cu b/src/kbmod/search/kernels.cu index ed9fd97a..e26549e7 100644 --- a/src/kbmod/search/kernels.cu +++ b/src/kbmod/search/kernels.cu @@ -19,11 +19,36 @@ #include "common.h" #include "cuda_errors.h" - -//#include "image_stack.h" +#include "psi_phi_array_ds.h" namespace search { +extern "C" void device_allocate_psi_phi_array(PsiPhiArray& data) { + assertm(data.get_cpu_array_ptr() != nullptr, "CPU array not allocated."); + assertm(data.get_gpu_array_ptr() == nullptr, "GPU array already allocated."); + + void* array_ptr; + checkCudaErrors(cudaMalloc((void **)&array_ptr, data.get_total_array_size())); + checkCudaErrors(cudaMemcpy(array_ptr, + data.get_cpu_array_ptr(), + data.get_total_array_size(), + cudaMemcpyHostToDevice)); + data.set_gpu_array_ptr(array_ptr); +} + +extern "C" void device_free_psi_phi_array(PsiPhiArray& data) { + if (data.get_gpu_array_ptr() != nullptr) { + checkCudaErrors(cudaFree(data.get_gpu_array_ptr())); + data.set_gpu_array_ptr(nullptr); + } +} + +// Read the pixel from the compressed array. +inline __device__ PsiPhi device_read_encoded_psi_phi(PsiPhiArray& data, int time, int row, int col) { + return data.read_encoded_psi_phi(time, row, col, true); +} + + extern "C" __device__ __host__ void SigmaGFilteredIndicesCU(float *values, int num_values, float sgl0, float sgl1, float sigmag_coeff, float width, int *idx_array, int *min_keep_idx, @@ -285,6 +310,8 @@ void *encodeImageFloat(float *image_vect, unsigned int vectLength, bool debug) { return device_vect; } + + extern "C" void deviceSearchFilter(int num_images, int width, int height, float *psi_vect, float *phi_vect, PerImageData img_data, SearchParameters params, int num_trajectories, Trajectory *trj_to_search, int num_results, Trajectory *best_results) { diff --git a/src/kbmod/search/psi_phi_array.cpp b/src/kbmod/search/psi_phi_array.cpp new file mode 100644 index 00000000..a281b81f --- /dev/null +++ b/src/kbmod/search/psi_phi_array.cpp @@ -0,0 +1,220 @@ +#include "psi_phi_array_ds.h" +#include "psi_phi_array_utils.h" + +namespace search { + +// Declaration of CUDA functions that will be linked in. +#ifdef HAVE_CUDA +extern "C" void device_allocate_psi_phi_array(PsiPhiArray& data); + +extern "C" void device_free_psi_phi_array(PsiPhiArray& data); +#endif + +// ------------------------------------------------------- +// --- Implementation of core data structure functions --- +// ------------------------------------------------------- + +PsiPhiArray::PsiPhiArray() : num_bytes(-1) { + block_size = sizeof(float); +} + +PsiPhiArray::PsiPhiArray(int encode_bytes) : num_bytes(encode_bytes) { + if (num_bytes == 1) { + block_size = sizeof(uint8_t); + } else if (num_bytes == 2) { + block_size = sizeof(uint16_t); + } else { + num_bytes = -1; + block_size = sizeof(float); + } +} + +PsiPhiArray::~PsiPhiArray() { + clear(); +} + +void PsiPhiArray::clear() { + // Free all used memory. + if (cpu_array_ptr != nullptr) { + free(cpu_array_ptr); + cpu_array_ptr = nullptr; + } + + // Reset the meta data except the encoding information. + num_times = 0; + width = 0; + height = 0; + pixels_per_image = 0; + num_entries = 0; + total_array_size = 0; + + psi_min_val = FLT_MAX; + psi_max_val = -FLT_MAX; + psi_scale = 1.0; + + phi_min_val = FLT_MAX; + phi_max_val = -FLT_MAX; + phi_scale = 1.0; +} + +void PsiPhiArray::set_meta_data(int new_num_times, int new_width, int new_height) { + assertm(num_times > 0 && width > 0 && height > 0, + "Invalid metadata provided to PsiPhiArray"); + assertm(cpu_array_ptr == nullptr, "Cannot change meta data with allocated arrays. Call clear() first."); + + num_times = new_num_times; + width = new_width; + height = new_height; + pixels_per_image = width * height; + num_entries = 2 * pixels_per_image * num_times; + total_array_size = block_size * num_entries; +} + +void PsiPhiArray::set_psi_scaling(float min_val, float max_val, float scale_val) { + assertm(min_val < max_val, "Min value needs to be < max value"); + assertm(scale_val > 0.0, "Scale value must be greater than zero."); + psi_min_val = min_val; + psi_max_val = max_val; + psi_scale = scale_val; +} + +void PsiPhiArray::set_phi_scaling(float min_val, float max_val, float scale_val) { + assertm(min_val < max_val, "Min value needs to be < max value"); + assertm(scale_val > 0.0, "Scale value must be greater than zero."); + phi_min_val = min_val; + phi_max_val = max_val; + phi_scale = scale_val; +} + +// ------------------------------------------- +// --- Implementation of utility functions --- +// ------------------------------------------- + +// Compute the min, max, and scale parameter from the a vector of image data. +std::array compute_scale_params_from_image_vect(const std::vector& imgs, int num_bytes) { + int num_images = imgs.size(); + + // Do a linear pass through the data to compute the scaling parameters for psi and phi. + float min_val = FLT_MAX; + float max_val = -FLT_MAX; + for (int i = 0; i < num_images; ++i) { + std::array bnds = imgs[i].compute_bounds(); + if (bnds[0] < min_val) min_val = bnds[0]; + if (bnds[1] > max_val) max_val = bnds[1]; + } + + // Increase max_value slightly to avoid the max value rolling over during encoding. + max_val += 1e-6; + + // Set the scale if we are encoding the values. + float scale = 1.0; + if (num_bytes == 1 || num_bytes == 2) { + float width = (max_val - min_val); + long int num_values = (1 << (8 * num_bytes)) - 1; + scale = width / (double)num_values; + } + + return {min_val, max_val, scale}; +} + +template +void set_encode_cpu_psi_phi_array(PsiPhiArray& data, const std::vector& psi_imgs, + const std::vector& phi_imgs) { + assertm(data.get_cpu_array_ptr() != nullptr, "CPU PsiPhi already allocated."); + T *encoded = (T *)malloc(data.get_total_array_size()); + + int current_index = 0; + int num_bytes = data.get_num_bytes(); + for (int t = 0; t < data.get_num_times(); ++t) { + for (int row = 0; row < data.get_height(); ++row) { + for (int col = 0; col < data.get_width(); ++col) { + float psi_value = psi_imgs[t].get_pixel({row, col}); + float phi_value = phi_imgs[t].get_pixel({row, col}); + + // Handle the encoding for the different values. + if (num_bytes == 1 || num_bytes == 2) { + psi_value = encode_uint_scalar(psi_value, data.get_psi_min_val(), data.get_psi_max_val(), + data.get_psi_scale()); + phi_value = encode_uint_scalar(phi_value, data.get_phi_min_val(), data.get_phi_max_val(), + data.get_phi_scale()); + } + + encoded[current_index++] = static_cast(psi_value); + encoded[current_index++] = static_cast(phi_value); + assertm(current_index <= data.get_num_entries(), "Out of bounds write."); + } + } + } + + data.set_cpu_array_ptr((void*)encoded); +} + +void fill_psi_phi_array(PsiPhiArray& result_data, + const std::vector& psi_imgs, + const std::vector& phi_imgs) { + if (result_data.get_cpu_array_ptr() != nullptr) { + return; + } + + // Set the meta data and do a bunch of validity checks. + int num_times = psi_imgs.size(); + assertm(num_times > 0, "Trying to fill PsiPhi from empty vectors."); + assertm(num_times = phi_imgs.size(), "Size mismatch between psi and phi."); + + int width = phi_imgs[0].get_width(); + int height = phi_imgs[0].get_height(); + result_data.set_meta_data(num_times, width, height); + + // Compute the scaling parameters. + std::array psi_params = compute_scale_params_from_image_vect(psi_imgs, result_data.get_num_bytes()); + result_data.set_psi_scaling(psi_params[0], psi_params[1], psi_params[2]); + + std::array phi_params = compute_scale_params_from_image_vect(phi_imgs, result_data.get_num_bytes()); + result_data.set_phi_scaling(phi_params[0], phi_params[1], phi_params[2]); + + // Compute the memory footprint for the arrays and do the local encoding. + + if (result_data.get_num_bytes() == 1) { + set_encode_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); + } else if (result_data.get_num_bytes() == 2) { + set_encode_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); + } else { + set_encode_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); + } +} + +// ------------------------------------------- +// --- Python definitions -------------------- +// ------------------------------------------- + +#ifdef Py_PYTHON_H +static void psi_phi_array_binding(py::module& m) { + using ppa = search::PsiPhiArray; + + py::class_(m, "PsiPhiArray") + .def(py::init<>()) + .def(py::init()) + .def_property_readonly("num_bytes", &ppa::get_num_bytes) + .def_property_readonly("num_times", &ppa::get_num_times) + .def_property_readonly("width", &ppa::get_width) + .def_property_readonly("height", &ppa::get_height) + .def_property_readonly("pixels_per_image", &ppa::get_pixels_per_image) + .def_property_readonly("num_entries", &ppa::get_num_entries) + .def_property_readonly("total_array_size", &ppa::get_total_array_size) + .def_property_readonly("block_size", &ppa::get_block_size) + .def_property_readonly("psi_min_val", &ppa::get_psi_min_val) + .def_property_readonly("psi_max_val", &ppa::get_psi_max_val) + .def_property_readonly("psi_scale", &ppa::get_psi_scale) + .def_property_readonly("phi_min_val", &ppa::get_phi_min_val) + .def_property_readonly("phi_max_val", &ppa::get_phi_max_val) + .def_property_readonly("phi_scale", &ppa::get_phi_scale) + .def("set_meta_data", &ppa::set_meta_data) + .def("clear", &ppa::clear) + .def("read_encoded_psi_phi", &ppa::read_encoded_psi_phi); + m.def("compute_scale_params_from_image_vect", &search::compute_scale_params_from_image_vect); + m.def("decode_uint_scalar", &search::decode_uint_scalar); + m.def("encode_uint_scalar", &search::encode_uint_scalar); +} +#endif + +} /* namespace search */ diff --git a/src/kbmod/search/psi_phi_array_ds.h b/src/kbmod/search/psi_phi_array_ds.h new file mode 100644 index 00000000..2355cbd9 --- /dev/null +++ b/src/kbmod/search/psi_phi_array_ds.h @@ -0,0 +1,148 @@ +/* + * psi_phi_array_ds.h + * + * The data structure for the interleaved psi/phi array. The the data + * structure and core functions are included in the header (and separated out + * from the rest of the utility functions) to allow the CUDA files to import + * only what they need. + * + * Created on: Dec 5, 2023 + */ + +#ifndef PSI_PHI_ARRAY_DS_ +#define PSI_PHI_ARRAY_DS_ + +#include +#include +#include +#include + +#include "common.h" + +namespace search { + +/* PsiPhi is a simple struct to hold a named pair of psi and phi values. */ +struct PsiPhi { + float psi = 0.0; + float phi = 0.0; +}; + +// Helper utility functions. +inline float encode_uint_scalar(float value, float min_val, float max_val, float scale) { + return (value == NO_DATA) ? 0 : (std::max(std::min(value, max_val), min_val) - min_val) / scale + 1.0; +} + +inline float decode_uint_scalar(float value, float min_val, float scale) { + return (value == 0.0) ? NO_DATA : (value - 1.0) * scale + min_val; +} + +/* PsiPhiArray is a class to hold the psi and phi arrays for the GPU + as well as functions to do encoding and decoding. + All functions used by CUDA are inlined and defined in the header. +*/ +class PsiPhiArray { +public: + explicit PsiPhiArray(); + explicit PsiPhiArray(int encode_bytes); + virtual ~PsiPhiArray(); + + void clear(); + + // --- Getter functions (for Python interface) ---------------- + inline int get_num_bytes() { return num_bytes; } + inline int get_num_times() { return num_times; } + inline int get_width() { return width; } + inline int get_height() { return height; } + inline int get_pixels_per_image() { return pixels_per_image; } + inline int get_num_entries() { return num_entries; } + inline int get_total_array_size() { return total_array_size; } + inline int get_block_size() { return block_size; } + + inline float get_psi_min_val() { return psi_min_val; } + inline float get_psi_max_val() { return psi_max_val; } + inline float get_psi_scale() { return psi_scale; } + inline float get_phi_min_val() { return phi_min_val; } + inline float get_phi_max_val() { return phi_max_val; } + inline float get_phi_scale() { return phi_scale; } + + // Primary getter function for interaction (read the data). + inline PsiPhi read_encoded_psi_phi(int time, int row, int col, bool from_gpu); + + // Setters for the utility functions to allocate the data. + void set_meta_data(int new_num_times, int new_width, int new_height); + void set_psi_scaling(float min_val, float max_val, float scale_val); + void set_phi_scaling(float min_val, float max_val, float scale_val); + + // Array pointer functions needed for CUDA memory management. Should ONLY be called + // by those utility functions. All other access should be done through the getters above. + inline void* get_cpu_array_ptr() { return cpu_array_ptr; } + inline void* get_gpu_array_ptr() { return gpu_array_ptr; } + inline void set_cpu_array_ptr(void* new_ptr) { cpu_array_ptr = new_ptr; } + inline void set_gpu_array_ptr(void* new_ptr) { gpu_array_ptr = new_ptr; } + +private: + inline int psi_index(int time, int row, int col) const { + return 2 * (pixels_per_image * time + row * width + col); + } + + inline int phi_index(int time, int row, int col) const { + return 2 * (pixels_per_image * time + row * width + col) + 1; + } + + // --- Attributes ----------------------------- + int num_times = 0; + int width = 0; + int height = 0; + int pixels_per_image = 0; + int num_entries = 0; + int block_size = 0; + long unsigned total_array_size = 0; + + // Pointers the CPU and GPU array. + void* cpu_array_ptr = nullptr; + void* gpu_array_ptr = nullptr; + + // Compression and scaling parameters of on GPU array. + int num_bytes = -1; // -1 (float), 1 (unit8) or 2 (unit16) + + float psi_min_val = FLT_MAX; + float psi_max_val = -FLT_MAX; + float psi_scale = 1.0; + + float phi_min_val = FLT_MAX; + float phi_max_val = -FLT_MAX; + float phi_scale = 1.0; +}; + +// read_encoded_psi_phi() is implemented in the header file so the CUDA files do not need +// to link against psi_phi_array.cpp. +inline PsiPhi PsiPhiArray::read_encoded_psi_phi(int time, int row, int col, bool from_gpu) { + void* array_ptr = (from_gpu) ? gpu_array_ptr : cpu_array_ptr; + assertm(array_ptr != nullptr, "Image data not allocated."); + + // Compute the in list index from the row, column, and time. + int start_index = psi_index(time, row, col); + assertm((start_index >= 0) && (start_index < num_entries), "Invalid index."); + + // Short circuit the typical case of float encoding. + // No scaling or shifting done. + PsiPhi result; + if (num_bytes == -1) { + result.psi = reinterpret_cast(array_ptr)[start_index]; + result.phi = reinterpret_cast(array_ptr)[start_index + 1]; + } else { + // Handle the compressed encodings. + float psi_value = (num_bytes == 1) ? (float)reinterpret_cast(array_ptr)[start_index] + : (float)reinterpret_cast(array_ptr)[start_index]; + result.psi = decode_uint_scalar(psi_value, psi_min_val, psi_scale); + + float phi_value = (num_bytes == 1) ? (float)reinterpret_cast(array_ptr)[start_index + 1] + : (float)reinterpret_cast(array_ptr)[start_index + 1]; + result.phi = decode_uint_scalar(phi_value, phi_min_val, phi_scale); + } + return result; +} + +} /* namespace search */ + +#endif /* PSI_PHI_ARRAY_DS_ */ diff --git a/src/kbmod/search/psi_phi_array_utils.h b/src/kbmod/search/psi_phi_array_utils.h new file mode 100644 index 00000000..29ec6801 --- /dev/null +++ b/src/kbmod/search/psi_phi_array_utils.h @@ -0,0 +1,34 @@ +/* + * psi_phi_array_utils.h + * + * The utility functions for the psi/phi array. Broken out from the header + * data structure so that it can use packages that won't be imported into the + * CUDA kernel, such as Eigen. + * + * Created on: Dec 8, 2023 + */ + +#ifndef PSI_PHI_ARRAY_UTILS_ +#define PSI_PHI_ARRAY_UTILS_ + +#include +#include +#include +#include + +#include "common.h" +#include "psi_phi_array_ds.h" +#include "raw_image.h" + +namespace search { + +// Compute the min, max, and scale parameter from the a vector of image data. +std::array compute_scale_params_from_image_vect(const std::vector& imgs, int num_bytes); + +void fill_psi_phi_array(PsiPhiArray& result_data, + const std::vector& psi_imgs, + const std::vector& phi_imgs); + +} /* namespace search */ + +#endif /* PSI_PHI_ARRAY_UTILS_ */ diff --git a/tests/test_psi_phi_array.py b/tests/test_psi_phi_array.py new file mode 100644 index 00000000..959100e9 --- /dev/null +++ b/tests/test_psi_phi_array.py @@ -0,0 +1,143 @@ +import unittest + +import numpy as np + +from kbmod.search import ( + KB_NO_DATA, + PsiPhiArray, + RawImage, + compute_scale_params_from_image_vect, + decode_uint_scalar, + encode_uint_scalar, +) + + +class test_psi_phi_array(unittest.TestCase): + def setUp(self): + self.num_times = 2 + self.width = 4 + self.height = 5 + + psi_1_vals = np.arange(0, self.width * self.height, dtype=np.single) + psi_1_arr = psi_1_vals.reshape(self.height, self.width) + self.psi_1 = RawImage(img=psi_1_arr, obs_time=1.0) + + psi_2_vals = np.arange(self.width * self.height, 2 * self.width * self.height, dtype=np.single) + psi_2_arr = psi_2_vals.reshape(self.height, self.width) + self.psi_2 = RawImage(img=psi_2_arr, obs_time=2.0) + + self.phi_1 = RawImage(np.full((self.height, self.width), 0.1, dtype=np.single), obs_time=1.0) + self.phi_2 = RawImage(np.full((self.height, self.width), 0.2, dtype=np.single), obs_time=2.0) + + def test_init_float(self): + arr = PsiPhiArray() + self.assertEqual(arr.num_times, 0) + self.assertEqual(arr.num_bytes, -1) + self.assertEqual(arr.width, 0) + self.assertEqual(arr.height, 0) + self.assertEqual(arr.pixels_per_image, 0) + self.assertEqual(arr.num_entries, 0) + self.assertEqual(arr.block_size, 4) + self.assertEqual(arr.total_array_size, 0) + + arr.set_meta_data(self.num_times, self.width, self.height) + self.assertEqual(arr.num_bytes, -1) + self.assertEqual(arr.block_size, 4) + self.assertEqual(arr.num_times, self.num_times) + self.assertEqual(arr.width, self.width) + self.assertEqual(arr.height, self.height) + self.assertEqual(arr.pixels_per_image, self.width * self.height) + self.assertEqual(arr.num_entries, 2 * self.width * self.height * self.num_times) + self.assertEqual(arr.total_array_size, 4 * arr.num_entries) + + def test_init_uint8(self): + arr = PsiPhiArray(1) + self.assertEqual(arr.num_times, 0) + self.assertEqual(arr.num_bytes, 1) + self.assertEqual(arr.width, 0) + self.assertEqual(arr.height, 0) + self.assertEqual(arr.pixels_per_image, 0) + self.assertEqual(arr.num_entries, 0) + self.assertEqual(arr.block_size, 1) + self.assertEqual(arr.total_array_size, 0) + + arr.set_meta_data(self.num_times, self.width, self.height) + self.assertEqual(arr.num_bytes, 1) + self.assertEqual(arr.block_size, 1) + self.assertEqual(arr.num_times, self.num_times) + self.assertEqual(arr.width, self.width) + self.assertEqual(arr.height, self.height) + self.assertEqual(arr.pixels_per_image, self.width * self.height) + self.assertEqual(arr.num_entries, 2 * self.width * self.height * self.num_times) + self.assertEqual(arr.total_array_size, 1 * arr.num_entries) + + def test_init_uint16(self): + arr = PsiPhiArray(2) + self.assertEqual(arr.num_times, 0) + self.assertEqual(arr.num_bytes, 2) + self.assertEqual(arr.width, 0) + self.assertEqual(arr.height, 0) + self.assertEqual(arr.pixels_per_image, 0) + self.assertEqual(arr.num_entries, 0) + self.assertEqual(arr.block_size, 2) + self.assertEqual(arr.total_array_size, 0) + + arr.set_meta_data(self.num_times, self.width, self.height) + self.assertEqual(arr.num_bytes, 2) + self.assertEqual(arr.block_size, 2) + self.assertEqual(arr.num_times, self.num_times) + self.assertEqual(arr.width, self.width) + self.assertEqual(arr.height, self.height) + self.assertEqual(arr.pixels_per_image, self.width * self.height) + self.assertEqual(arr.num_entries, 2 * self.width * self.height * self.num_times) + self.assertEqual(arr.total_array_size, 2 * arr.num_entries) + + def test_decode_uint_scalar(self): + self.assertAlmostEqual(decode_uint_scalar(1.0, 0.0, 5.0), 0.0) + self.assertAlmostEqual(decode_uint_scalar(2.0, 0.0, 5.0), 5.0) + self.assertAlmostEqual(decode_uint_scalar(3.0, 0.0, 5.0), 10.0) + + self.assertAlmostEqual(decode_uint_scalar(1.0, 2.5, 3.0), 2.5) + self.assertAlmostEqual(decode_uint_scalar(2.0, 2.5, 3.0), 5.5) + self.assertAlmostEqual(decode_uint_scalar(3.0, 2.5, 3.0), 8.5) + + # 0.0 always decodes to NO_DATA. + self.assertAlmostEqual(decode_uint_scalar(0.0, 1.0, 5.0), KB_NO_DATA) + + def encode_uint_scalar(self): + self.assertAlmostEqual(encode_uint_scalar(0.0, 0.0, 10.0, 0.1), 1.0) + self.assertAlmostEqual(encode_uint_scalar(0.1, 0.0, 10.0, 0.1), 2.0) + self.assertAlmostEqual(encode_uint_scalar(1.0, 0.0, 10.0, 0.1), 11.0) + self.assertAlmostEqual(encode_uint_scalar(2.0, 0.0, 10.0, 0.1), 21.0) + + # NO_DATA always encodes to 0.0. + self.assertAlmostEqual(encode_uint_scalar(KB_NO_DATA, 0.0, 10.0, 0.1), 0.0) + + # Test clipping. + self.assertAlmostEqual(encode_uint_scalar(11.0, 0.0, 10.0, 0.1), 100.0) + self.assertAlmostEqual(encode_uint_scalar(-100.0, 0.0, 10.0, 0.1), 1.0) + + def test_compute_scale_params_from_image_vect(self): + max_val = (2 * self.width * self.height - 1) + 1e-6 + + # Parameters for encoding to a float + result_float = compute_scale_params_from_image_vect([self.psi_1, self.psi_2], -1) + self.assertAlmostEqual(result_float[0], 0.0, delta=1e-5) + self.assertAlmostEqual(result_float[1], max_val, delta=1e-5) + self.assertAlmostEqual(result_float[2], 1.0, delta=1e-5) + + # Parameters for encoding to an uint8 + result_uint8 = compute_scale_params_from_image_vect([self.psi_1, self.psi_2], 1) + self.assertAlmostEqual(result_uint8[0], 0.0, delta=1e-5) + self.assertAlmostEqual(result_uint8[1], max_val, delta=1e-5) + self.assertAlmostEqual(result_uint8[2], max_val / 255.0, delta=1e-5) + + # Parameters for encoding to an uint16 + result_uint16 = compute_scale_params_from_image_vect([self.psi_1, self.psi_2], 2) + self.assertAlmostEqual(result_uint16[0], 0.0, delta=1e-5) + self.assertAlmostEqual(result_uint16[1], max_val, delta=1e-5) + self.assertAlmostEqual(result_uint16[2], max_val / 65535.0, delta=1e-5) + + +if __name__ == "__main__": + unittest.main() From d87dcbc12f3846c1d4921f0fdea5e58efe9eadbb Mon Sep 17 00:00:00 2001 From: Jeremy Kubica <104161096+jeremykubica@users.noreply.github.com> Date: Fri, 8 Dec 2023 11:37:41 -0500 Subject: [PATCH 2/7] Add on GPU allocation and tests --- src/kbmod/search/kernels.cu | 22 +++++++++---------- src/kbmod/search/psi_phi_array.cpp | 27 +++++++++++++++++++---- src/kbmod/search/psi_phi_array_ds.h | 3 +++ tests/test_psi_phi_array.py | 34 +++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 15 deletions(-) diff --git a/src/kbmod/search/kernels.cu b/src/kbmod/search/kernels.cu index e26549e7..dcfc65a9 100644 --- a/src/kbmod/search/kernels.cu +++ b/src/kbmod/search/kernels.cu @@ -23,23 +23,23 @@ namespace search { -extern "C" void device_allocate_psi_phi_array(PsiPhiArray& data) { - assertm(data.get_cpu_array_ptr() != nullptr, "CPU array not allocated."); - assertm(data.get_gpu_array_ptr() == nullptr, "GPU array already allocated."); +extern "C" void device_allocate_psi_phi_array(PsiPhiArray* data) { + assertm(data->cpu_array_allocated(), "CPU array not allocated."); + assertm(data->gpu_array_allocated(), "GPU array already allocated."); void* array_ptr; - checkCudaErrors(cudaMalloc((void **)&array_ptr, data.get_total_array_size())); + checkCudaErrors(cudaMalloc((void **)&array_ptr, data->get_total_array_size())); checkCudaErrors(cudaMemcpy(array_ptr, - data.get_cpu_array_ptr(), - data.get_total_array_size(), + data->get_cpu_array_ptr(), + data->get_total_array_size(), cudaMemcpyHostToDevice)); - data.set_gpu_array_ptr(array_ptr); + data->set_gpu_array_ptr(array_ptr); } -extern "C" void device_free_psi_phi_array(PsiPhiArray& data) { - if (data.get_gpu_array_ptr() != nullptr) { - checkCudaErrors(cudaFree(data.get_gpu_array_ptr())); - data.set_gpu_array_ptr(nullptr); +extern "C" void device_free_psi_phi_array(PsiPhiArray* data) { + if (data->gpu_array_allocated()) { + checkCudaErrors(cudaFree(data->get_gpu_array_ptr())); + data->set_gpu_array_ptr(nullptr); } } diff --git a/src/kbmod/search/psi_phi_array.cpp b/src/kbmod/search/psi_phi_array.cpp index a281b81f..fac11112 100644 --- a/src/kbmod/search/psi_phi_array.cpp +++ b/src/kbmod/search/psi_phi_array.cpp @@ -5,9 +5,9 @@ namespace search { // Declaration of CUDA functions that will be linked in. #ifdef HAVE_CUDA -extern "C" void device_allocate_psi_phi_array(PsiPhiArray& data); +extern "C" void device_allocate_psi_phi_array(PsiPhiArray* data); -extern "C" void device_free_psi_phi_array(PsiPhiArray& data); +extern "C" void device_free_psi_phi_array(PsiPhiArray* data); #endif // ------------------------------------------------------- @@ -34,11 +34,17 @@ PsiPhiArray::~PsiPhiArray() { } void PsiPhiArray::clear() { - // Free all used memory. + // Free all used memory on CPU and GPU. if (cpu_array_ptr != nullptr) { free(cpu_array_ptr); cpu_array_ptr = nullptr; } +#ifdef HAVE_CUDA + if (gpu_array_ptr != nullptr) { + device_free_psi_phi_array(this); + gpu_array_ptr = nullptr; + } +#endif // Reset the meta data except the encoding information. num_times = 0; @@ -181,6 +187,11 @@ void fill_psi_phi_array(PsiPhiArray& result_data, } else { set_encode_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); } + +#ifdef HAVE_CUDA + // Create a copy of the encoded data in GPU memory. + device_allocate_psi_phi_array(&result_data); +#endif } // ------------------------------------------- @@ -190,7 +201,12 @@ void fill_psi_phi_array(PsiPhiArray& result_data, #ifdef Py_PYTHON_H static void psi_phi_array_binding(py::module& m) { using ppa = search::PsiPhiArray; - + + py::class_(m, "PsiPhi") + .def(py::init<>()) + .def_readwrite("psi", &search::PsiPhi::psi) + .def_readwrite("phi", &search::PsiPhi::phi); + py::class_(m, "PsiPhiArray") .def(py::init<>()) .def(py::init()) @@ -208,12 +224,15 @@ static void psi_phi_array_binding(py::module& m) { .def_property_readonly("phi_min_val", &ppa::get_phi_min_val) .def_property_readonly("phi_max_val", &ppa::get_phi_max_val) .def_property_readonly("phi_scale", &ppa::get_phi_scale) + .def_property_readonly("cpu_array_allocated", &ppa::cpu_array_allocated) + .def_property_readonly("gpu_array_allocated", &ppa::gpu_array_allocated) .def("set_meta_data", &ppa::set_meta_data) .def("clear", &ppa::clear) .def("read_encoded_psi_phi", &ppa::read_encoded_psi_phi); m.def("compute_scale_params_from_image_vect", &search::compute_scale_params_from_image_vect); m.def("decode_uint_scalar", &search::decode_uint_scalar); m.def("encode_uint_scalar", &search::encode_uint_scalar); + m.def("fill_psi_phi_array", &search::fill_psi_phi_array); } #endif diff --git a/src/kbmod/search/psi_phi_array_ds.h b/src/kbmod/search/psi_phi_array_ds.h index 2355cbd9..9741a93c 100644 --- a/src/kbmod/search/psi_phi_array_ds.h +++ b/src/kbmod/search/psi_phi_array_ds.h @@ -65,6 +65,9 @@ class PsiPhiArray { inline float get_phi_max_val() { return phi_max_val; } inline float get_phi_scale() { return phi_scale; } + inline bool cpu_array_allocated() { return cpu_array_ptr != nullptr; } + inline bool gpu_array_allocated() { return gpu_array_ptr != nullptr; } + // Primary getter function for interaction (read the data). inline PsiPhi read_encoded_psi_phi(int time, int row, int col, bool from_gpu); diff --git a/tests/test_psi_phi_array.py b/tests/test_psi_phi_array.py index 959100e9..5fc0251d 100644 --- a/tests/test_psi_phi_array.py +++ b/tests/test_psi_phi_array.py @@ -4,11 +4,13 @@ from kbmod.search import ( KB_NO_DATA, + PsiPhi, PsiPhiArray, RawImage, compute_scale_params_from_image_vect, decode_uint_scalar, encode_uint_scalar, + fill_psi_phi_array, ) @@ -138,6 +140,38 @@ def test_compute_scale_params_from_image_vect(self): self.assertAlmostEqual(result_uint16[1], max_val, delta=1e-5) self.assertAlmostEqual(result_uint16[2], max_val / 65535.0, delta=1e-5) + def test_fill_psi_phi_array(self): + arr = PsiPhiArray() + fill_psi_phi_array(arr, [self.psi_1, self.psi_2], [self.phi_1, self.phi_2]) + + # Check the meta data. + self.assertEqual(arr.num_times, self.num_times) + self.assertEqual(arr.num_bytes, -1) + self.assertEqual(arr.width, self.width) + self.assertEqual(arr.height, self.height) + self.assertEqual(arr.pixels_per_image, self.width * self.height) + self.assertEqual(arr.num_entries, 2 * arr.pixels_per_image * self.num_times) + self.assertEqual(arr.block_size, 4) + self.assertEqual(arr.total_array_size, arr.num_entries * arr.block_size) + + # Check that we can read the values from the CPU array. + self.assertTrue(arr.cpu_array_allocated) + self.assertTrue(arr.gpu_array_allocated) + + # Check that we can correctly read the values from the CPU. + for time in range(self.num_times): + offset = time * self.width * self.height + for row in range(self.height): + for col in range(self.width): + val = arr.read_encoded_psi_phi(time, row, col, False) + self.assertAlmostEqual(val.psi, offset + row * self.width + col, delta=1e-5) + self.assertAlmostEqual(val.phi, 0.1 * (time + 1)) + + # Check that the arrays are set to NULL after we clear it (memory should be freed too). + arr.clear() + self.assertFalse(arr.cpu_array_allocated) + self.assertFalse(arr.gpu_array_allocated) + if __name__ == "__main__": unittest.main() From f4c05341dca6ccaf3fe7e80dddeef63d8ee56341 Mon Sep 17 00:00:00 2001 From: Jeremy Kubica <104161096+jeremykubica@users.noreply.github.com> Date: Fri, 8 Dec 2023 12:51:43 -0500 Subject: [PATCH 3/7] Fix rollover issue in encoding --- src/kbmod/search/psi_phi_array.cpp | 16 ++++---- tests/test_psi_phi_array.py | 66 ++++++++++++++++-------------- 2 files changed, 44 insertions(+), 38 deletions(-) diff --git a/src/kbmod/search/psi_phi_array.cpp b/src/kbmod/search/psi_phi_array.cpp index fac11112..b7b95c57 100644 --- a/src/kbmod/search/psi_phi_array.cpp +++ b/src/kbmod/search/psi_phi_array.cpp @@ -109,13 +109,12 @@ std::array compute_scale_params_from_image_vect(const std::vector max_val) max_val = bnds[1]; } - // Increase max_value slightly to avoid the max value rolling over during encoding. - max_val += 1e-6; - // Set the scale if we are encoding the values. float scale = 1.0; if (num_bytes == 1 || num_bytes == 2) { float width = (max_val - min_val); + if (width < 1e-6) width = 1e-6; // Avoid a zero width. + long int num_values = (1 << (8 * num_bytes)) - 1; scale = width / (double)num_values; } @@ -129,6 +128,11 @@ void set_encode_cpu_psi_phi_array(PsiPhiArray& data, const std::vector assertm(data.get_cpu_array_ptr() != nullptr, "CPU PsiPhi already allocated."); T *encoded = (T *)malloc(data.get_total_array_size()); + // Create a safe maximum that is slightly less than the true max to avoid + // rollover of the unsigned integer. + float safe_max_psi = data.get_psi_max_val() - data.get_psi_scale() / 100.0; + float safe_max_phi = data.get_phi_max_val() - data.get_phi_scale() / 100.0; + int current_index = 0; int num_bytes = data.get_num_bytes(); for (int t = 0; t < data.get_num_times(); ++t) { @@ -139,10 +143,8 @@ void set_encode_cpu_psi_phi_array(PsiPhiArray& data, const std::vector // Handle the encoding for the different values. if (num_bytes == 1 || num_bytes == 2) { - psi_value = encode_uint_scalar(psi_value, data.get_psi_min_val(), data.get_psi_max_val(), - data.get_psi_scale()); - phi_value = encode_uint_scalar(phi_value, data.get_phi_min_val(), data.get_phi_max_val(), - data.get_phi_scale()); + psi_value = encode_uint_scalar(psi_value, data.get_psi_min_val(), safe_max_psi, data.get_psi_scale()); + phi_value = encode_uint_scalar(phi_value, data.get_phi_min_val(), safe_max_phi, data.get_phi_scale()); } encoded[current_index++] = static_cast(psi_value); diff --git a/tests/test_psi_phi_array.py b/tests/test_psi_phi_array.py index 5fc0251d..de0830c3 100644 --- a/tests/test_psi_phi_array.py +++ b/tests/test_psi_phi_array.py @@ -120,7 +120,7 @@ def encode_uint_scalar(self): self.assertAlmostEqual(encode_uint_scalar(-100.0, 0.0, 10.0, 0.1), 1.0) def test_compute_scale_params_from_image_vect(self): - max_val = (2 * self.width * self.height - 1) + 1e-6 + max_val = 2 * self.width * self.height - 1 # Parameters for encoding to a float result_float = compute_scale_params_from_image_vect([self.psi_1, self.psi_2], -1) @@ -141,36 +141,40 @@ def test_compute_scale_params_from_image_vect(self): self.assertAlmostEqual(result_uint16[2], max_val / 65535.0, delta=1e-5) def test_fill_psi_phi_array(self): - arr = PsiPhiArray() - fill_psi_phi_array(arr, [self.psi_1, self.psi_2], [self.phi_1, self.phi_2]) - - # Check the meta data. - self.assertEqual(arr.num_times, self.num_times) - self.assertEqual(arr.num_bytes, -1) - self.assertEqual(arr.width, self.width) - self.assertEqual(arr.height, self.height) - self.assertEqual(arr.pixels_per_image, self.width * self.height) - self.assertEqual(arr.num_entries, 2 * arr.pixels_per_image * self.num_times) - self.assertEqual(arr.block_size, 4) - self.assertEqual(arr.total_array_size, arr.num_entries * arr.block_size) - - # Check that we can read the values from the CPU array. - self.assertTrue(arr.cpu_array_allocated) - self.assertTrue(arr.gpu_array_allocated) - - # Check that we can correctly read the values from the CPU. - for time in range(self.num_times): - offset = time * self.width * self.height - for row in range(self.height): - for col in range(self.width): - val = arr.read_encoded_psi_phi(time, row, col, False) - self.assertAlmostEqual(val.psi, offset + row * self.width + col, delta=1e-5) - self.assertAlmostEqual(val.phi, 0.1 * (time + 1)) - - # Check that the arrays are set to NULL after we clear it (memory should be freed too). - arr.clear() - self.assertFalse(arr.cpu_array_allocated) - self.assertFalse(arr.gpu_array_allocated) + for num_bytes in [-1, 2]: + arr = PsiPhiArray(num_bytes) + fill_psi_phi_array(arr, [self.psi_1, self.psi_2], [self.phi_1, self.phi_2]) + + # Check the meta data. + self.assertEqual(arr.num_times, self.num_times) + self.assertEqual(arr.num_bytes, num_bytes) + self.assertEqual(arr.width, self.width) + self.assertEqual(arr.height, self.height) + self.assertEqual(arr.pixels_per_image, self.width * self.height) + self.assertEqual(arr.num_entries, 2 * arr.pixels_per_image * self.num_times) + if num_bytes == -1: + self.assertEqual(arr.block_size, 4) + else: + self.assertEqual(arr.block_size, num_bytes) + self.assertEqual(arr.total_array_size, arr.num_entries * arr.block_size) + + # Check that we can read the values from the CPU array. + self.assertTrue(arr.cpu_array_allocated) + self.assertTrue(arr.gpu_array_allocated) + + # Check that we can correctly read the values from the CPU. + for time in range(self.num_times): + offset = time * self.width * self.height + for row in range(self.height): + for col in range(self.width): + val = arr.read_encoded_psi_phi(time, row, col, False) + self.assertAlmostEqual(val.psi, offset + row * self.width + col, delta=0.05) + self.assertAlmostEqual(val.phi, 0.1 * (time + 1), delta=1e-5) + + # Check that the arrays are set to NULL after we clear it (memory should be freed too). + arr.clear() + self.assertFalse(arr.cpu_array_allocated) + self.assertFalse(arr.gpu_array_allocated) if __name__ == "__main__": From 0b880f5b1b96665227105f053e1c1aeca96d4ff4 Mon Sep 17 00:00:00 2001 From: Jeremy Kubica <104161096+jeremykubica@users.noreply.github.com> Date: Fri, 8 Dec 2023 13:58:32 -0500 Subject: [PATCH 4/7] Make num_bytes settable after PsiPhiArray created --- src/kbmod/search/psi_phi_array.cpp | 35 +++++++++++------------ src/kbmod/search/psi_phi_array_ds.h | 3 +- src/kbmod/search/psi_phi_array_utils.h | 1 + tests/test_psi_phi_array.py | 39 +++++++------------------- 4 files changed, 29 insertions(+), 49 deletions(-) diff --git a/src/kbmod/search/psi_phi_array.cpp b/src/kbmod/search/psi_phi_array.cpp index b7b95c57..cbfac390 100644 --- a/src/kbmod/search/psi_phi_array.cpp +++ b/src/kbmod/search/psi_phi_array.cpp @@ -14,21 +14,9 @@ extern "C" void device_free_psi_phi_array(PsiPhiArray* data); // --- Implementation of core data structure functions --- // ------------------------------------------------------- -PsiPhiArray::PsiPhiArray() : num_bytes(-1) { - block_size = sizeof(float); +PsiPhiArray::PsiPhiArray() { } - -PsiPhiArray::PsiPhiArray(int encode_bytes) : num_bytes(encode_bytes) { - if (num_bytes == 1) { - block_size = sizeof(uint8_t); - } else if (num_bytes == 2) { - block_size = sizeof(uint16_t); - } else { - num_bytes = -1; - block_size = sizeof(float); - } -} - + PsiPhiArray::~PsiPhiArray() { clear(); } @@ -63,11 +51,22 @@ void PsiPhiArray::clear() { phi_scale = 1.0; } -void PsiPhiArray::set_meta_data(int new_num_times, int new_width, int new_height) { +void PsiPhiArray::set_meta_data(int new_num_bytes, int new_num_times, int new_width, int new_height) { + assertm(new_num_bytes != -1 and new_num_bytes != 1 and new_num_bytes != 2 and new_num_bytes != 4, + "Invalid setting of num_bytes. Must be (-1, 1, 2, or 4)."); + num_bytes = new_num_bytes; + if (num_bytes == 1) { + block_size = sizeof(uint8_t); + } else if (num_bytes == 2) { + block_size = sizeof(uint16_t); + } else { + num_bytes = -1; + block_size = sizeof(float); + } + assertm(num_times > 0 && width > 0 && height > 0, "Invalid metadata provided to PsiPhiArray"); assertm(cpu_array_ptr == nullptr, "Cannot change meta data with allocated arrays. Call clear() first."); - num_times = new_num_times; width = new_width; height = new_height; @@ -158,6 +157,7 @@ void set_encode_cpu_psi_phi_array(PsiPhiArray& data, const std::vector } void fill_psi_phi_array(PsiPhiArray& result_data, + int num_bytes, const std::vector& psi_imgs, const std::vector& phi_imgs) { if (result_data.get_cpu_array_ptr() != nullptr) { @@ -171,7 +171,7 @@ void fill_psi_phi_array(PsiPhiArray& result_data, int width = phi_imgs[0].get_width(); int height = phi_imgs[0].get_height(); - result_data.set_meta_data(num_times, width, height); + result_data.set_meta_data(num_bytes, num_times, width, height); // Compute the scaling parameters. std::array psi_params = compute_scale_params_from_image_vect(psi_imgs, result_data.get_num_bytes()); @@ -211,7 +211,6 @@ static void psi_phi_array_binding(py::module& m) { py::class_(m, "PsiPhiArray") .def(py::init<>()) - .def(py::init()) .def_property_readonly("num_bytes", &ppa::get_num_bytes) .def_property_readonly("num_times", &ppa::get_num_times) .def_property_readonly("width", &ppa::get_width) diff --git a/src/kbmod/search/psi_phi_array_ds.h b/src/kbmod/search/psi_phi_array_ds.h index 9741a93c..b191f74f 100644 --- a/src/kbmod/search/psi_phi_array_ds.h +++ b/src/kbmod/search/psi_phi_array_ds.h @@ -43,7 +43,6 @@ inline float decode_uint_scalar(float value, float min_val, float scale) { class PsiPhiArray { public: explicit PsiPhiArray(); - explicit PsiPhiArray(int encode_bytes); virtual ~PsiPhiArray(); void clear(); @@ -72,7 +71,7 @@ class PsiPhiArray { inline PsiPhi read_encoded_psi_phi(int time, int row, int col, bool from_gpu); // Setters for the utility functions to allocate the data. - void set_meta_data(int new_num_times, int new_width, int new_height); + void set_meta_data(int new_num_bytes, int new_num_times, int new_width, int new_height); void set_psi_scaling(float min_val, float max_val, float scale_val); void set_phi_scaling(float min_val, float max_val, float scale_val); diff --git a/src/kbmod/search/psi_phi_array_utils.h b/src/kbmod/search/psi_phi_array_utils.h index 29ec6801..e0b56217 100644 --- a/src/kbmod/search/psi_phi_array_utils.h +++ b/src/kbmod/search/psi_phi_array_utils.h @@ -26,6 +26,7 @@ namespace search { std::array compute_scale_params_from_image_vect(const std::vector& imgs, int num_bytes); void fill_psi_phi_array(PsiPhiArray& result_data, + int num_bytes, const std::vector& psi_imgs, const std::vector& phi_imgs); diff --git a/tests/test_psi_phi_array.py b/tests/test_psi_phi_array.py index de0830c3..224f004f 100644 --- a/tests/test_psi_phi_array.py +++ b/tests/test_psi_phi_array.py @@ -31,7 +31,7 @@ def setUp(self): self.phi_1 = RawImage(np.full((self.height, self.width), 0.1, dtype=np.single), obs_time=1.0) self.phi_2 = RawImage(np.full((self.height, self.width), 0.2, dtype=np.single), obs_time=2.0) - def test_init_float(self): + def test_set_meta_data(self): arr = PsiPhiArray() self.assertEqual(arr.num_times, 0) self.assertEqual(arr.num_bytes, -1) @@ -39,10 +39,11 @@ def test_init_float(self): self.assertEqual(arr.height, 0) self.assertEqual(arr.pixels_per_image, 0) self.assertEqual(arr.num_entries, 0) - self.assertEqual(arr.block_size, 4) + self.assertEqual(arr.block_size, 0) self.assertEqual(arr.total_array_size, 0) - arr.set_meta_data(self.num_times, self.width, self.height) + # Make float + arr.set_meta_data(-1, self.num_times, self.width, self.height) self.assertEqual(arr.num_bytes, -1) self.assertEqual(arr.block_size, 4) self.assertEqual(arr.num_times, self.num_times) @@ -52,18 +53,8 @@ def test_init_float(self): self.assertEqual(arr.num_entries, 2 * self.width * self.height * self.num_times) self.assertEqual(arr.total_array_size, 4 * arr.num_entries) - def test_init_uint8(self): - arr = PsiPhiArray(1) - self.assertEqual(arr.num_times, 0) - self.assertEqual(arr.num_bytes, 1) - self.assertEqual(arr.width, 0) - self.assertEqual(arr.height, 0) - self.assertEqual(arr.pixels_per_image, 0) - self.assertEqual(arr.num_entries, 0) - self.assertEqual(arr.block_size, 1) - self.assertEqual(arr.total_array_size, 0) - - arr.set_meta_data(self.num_times, self.width, self.height) + # Make uint8 + arr.set_meta_data(1, self.num_times, self.width, self.height) self.assertEqual(arr.num_bytes, 1) self.assertEqual(arr.block_size, 1) self.assertEqual(arr.num_times, self.num_times) @@ -73,18 +64,8 @@ def test_init_uint8(self): self.assertEqual(arr.num_entries, 2 * self.width * self.height * self.num_times) self.assertEqual(arr.total_array_size, 1 * arr.num_entries) - def test_init_uint16(self): - arr = PsiPhiArray(2) - self.assertEqual(arr.num_times, 0) - self.assertEqual(arr.num_bytes, 2) - self.assertEqual(arr.width, 0) - self.assertEqual(arr.height, 0) - self.assertEqual(arr.pixels_per_image, 0) - self.assertEqual(arr.num_entries, 0) - self.assertEqual(arr.block_size, 2) - self.assertEqual(arr.total_array_size, 0) - - arr.set_meta_data(self.num_times, self.width, self.height) + # Make uint16 + arr.set_meta_data(2, self.num_times, self.width, self.height) self.assertEqual(arr.num_bytes, 2) self.assertEqual(arr.block_size, 2) self.assertEqual(arr.num_times, self.num_times) @@ -142,8 +123,8 @@ def test_compute_scale_params_from_image_vect(self): def test_fill_psi_phi_array(self): for num_bytes in [-1, 2]: - arr = PsiPhiArray(num_bytes) - fill_psi_phi_array(arr, [self.psi_1, self.psi_2], [self.phi_1, self.phi_2]) + arr = PsiPhiArray() + fill_psi_phi_array(arr, num_bytes, [self.psi_1, self.psi_2], [self.phi_1, self.phi_2]) # Check the meta data. self.assertEqual(arr.num_times, self.num_times) From 96af812b6f71ff7b6564a529c87289b59749a144 Mon Sep 17 00:00:00 2001 From: Jeremy Kubica <104161096+jeremykubica@users.noreply.github.com> Date: Mon, 11 Dec 2023 13:14:23 -0500 Subject: [PATCH 5/7] Incorporate into kernel search --- src/kbmod/search/kernels.cu | 261 +++++++---------------- src/kbmod/search/psi_phi_array.cpp | 277 +++++++++++++++---------- src/kbmod/search/psi_phi_array_ds.h | 132 +++++------- src/kbmod/search/psi_phi_array_utils.h | 8 +- src/kbmod/search/stack_search.cpp | 102 +-------- src/kbmod/search/stack_search.h | 10 +- tests/test_psi_phi_array.py | 13 +- 7 files changed, 307 insertions(+), 496 deletions(-) diff --git a/src/kbmod/search/kernels.cu b/src/kbmod/search/kernels.cu index dcfc65a9..157ae377 100644 --- a/src/kbmod/search/kernels.cu +++ b/src/kbmod/search/kernels.cu @@ -23,31 +23,35 @@ namespace search { -extern "C" void device_allocate_psi_phi_array(PsiPhiArray* data) { - assertm(data->cpu_array_allocated(), "CPU array not allocated."); - assertm(data->gpu_array_allocated(), "GPU array already allocated."); - - void* array_ptr; - checkCudaErrors(cudaMalloc((void **)&array_ptr, data->get_total_array_size())); - checkCudaErrors(cudaMemcpy(array_ptr, - data->get_cpu_array_ptr(), - data->get_total_array_size(), - cudaMemcpyHostToDevice)); - data->set_gpu_array_ptr(array_ptr); -} +__forceinline__ __device__ PsiPhi read_encoded_psi_phi(PsiPhiArrayMeta ¶ms, void *psi_phi_vect, int time, + int row, int col) { + // Bounds checking. + if ((row < 0) || (col < 0) || (row >= params.height) || (col >= params.width)) { + return {NO_DATA, NO_DATA}; + } -extern "C" void device_free_psi_phi_array(PsiPhiArray* data) { - if (data->gpu_array_allocated()) { - checkCudaErrors(cudaFree(data->get_gpu_array_ptr())); - data->set_gpu_array_ptr(nullptr); + // Compute the in-list index from the row, column, and time. + int start_index = 2 * (params.pixels_per_image * time + row * params.width + col); + if (params.num_bytes == -1) { + // Short circuit the typical case of float encoding. No scaling or shifting done. + return {reinterpret_cast(psi_phi_vect)[start_index], + reinterpret_cast(psi_phi_vect)[start_index + 1]}; } -} -// Read the pixel from the compressed array. -inline __device__ PsiPhi device_read_encoded_psi_phi(PsiPhiArray& data, int time, int row, int col) { - return data.read_encoded_psi_phi(time, row, col, true); -} + // Handle the compressed encodings. + PsiPhi result; + float psi_value = (params.num_bytes == 1) + ? (float)reinterpret_cast(psi_phi_vect)[start_index] + : (float)reinterpret_cast(psi_phi_vect)[start_index]; + result.psi = (psi_value == 0.0) ? NO_DATA : (psi_value - 1.0) * params.psi_scale + params.psi_min_val; + + float phi_value = (params.num_bytes == 1) + ? (float)reinterpret_cast(psi_phi_vect)[start_index + 1] + : (float)reinterpret_cast(psi_phi_vect)[start_index + 1]; + result.phi = (phi_value == 0.0) ? NO_DATA : (phi_value - 1.0) * params.phi_scale + params.phi_min_val; + return result; +} extern "C" __device__ __host__ void SigmaGFilteredIndicesCU(float *values, int num_values, float sgl0, float sgl1, float sigmag_coeff, float width, @@ -100,22 +104,17 @@ extern "C" __device__ __host__ void SigmaGFilteredIndicesCU(float *values, int n *max_keep_idx = end - 1; } -__device__ float ReadEncodedPixel(void *image_vect, int index, int n_bytes, const scaleParameters ¶ms) { - float value = (n_bytes == 1) ? (float)reinterpret_cast(image_vect)[index] - : (float)reinterpret_cast(image_vect)[index]; - float result = (value == 0.0) ? NO_DATA : (value - 1.0) * params.scale + params.min_val; - return result; -} - /* * Searches through images (represented as a flat array of floats) looking for most likely * trajectories in the given list. Outputs a results image of best trajectories. Returns a * fixed number of results per pixel specified by RESULTS_PER_PIXEL * filters results using a sigma_g-based filter and a central-moment filter. + * + * Creates a local copy of psi_phi_meta and params in local memory space. */ -__global__ void searchFilterImages(int num_images, int width, int height, void *psi_vect, void *phi_vect, - PerImageData image_data, SearchParameters params, int num_trajectories, - Trajectory *trajectories, Trajectory *results) { +__global__ void searchFilterImages(PsiPhiArrayMeta psi_phi_meta, void *psi_phi_vect, float *image_times, + SearchParameters params, int num_trajectories, Trajectory *trajectories, + Trajectory *results) { // Get the x and y coordinates within the search space. const int x_i = blockIdx.x * THREAD_DIM_X + threadIdx.x; const int y_i = blockIdx.y * THREAD_DIM_Y + threadIdx.y; @@ -130,7 +129,6 @@ __global__ void searchFilterImages(int num_images, int width, int height, void * // Get origin pixel for the trajectories in pixel space. const int x = x_i + params.x_start_min; const int y = y_i + params.y_start_min; - const unsigned int n_pixels = width * height; // Data structures used for filtering. float lc_array[MAX_NUM_IMAGES]; @@ -162,7 +160,7 @@ __global__ void searchFilterImages(int num_images, int width, int height, void * float phi_sum = 0.0; // Loop over each image and sample the appropriate pixel - for (int i = 0; i < num_images; ++i) { + for (int i = 0; i < psi_phi_meta.num_times; ++i) { lc_array[i] = 0; psi_array[i] = 0; phi_array[i] = 0; @@ -171,40 +169,21 @@ __global__ void searchFilterImages(int num_images, int width, int height, void * // Loop over each image and sample the appropriate pixel int num_seen = 0; - for (int i = 0; i < num_images; ++i) { + for (int i = 0; i < psi_phi_meta.num_times; ++i) { // Predict the trajectory's position. - float curr_time = image_data.image_times[i]; + float curr_time = image_times[i]; int current_x = x + int(curr_trj.vx * curr_time + 0.5); int current_y = y + int(curr_trj.vy * curr_time + 0.5); - // Test if trajectory goes out of the image, in which case we do not - // look up a pixel value for this time step (allowing trajectories to - // overlap the image for only some of the times). - if (current_x >= width || current_y >= height || current_x < 0 || current_y < 0) { - continue; - } - // Get the Psi and Phi pixel values. - unsigned int pixel_index = (n_pixels * i + current_y * width + current_x); - float curr_psi = (params.psi_num_bytes <= 0 || image_data.psi_params == nullptr) - ? reinterpret_cast(psi_vect)[pixel_index] - : ReadEncodedPixel(psi_vect, pixel_index, params.psi_num_bytes, - image_data.psi_params[i]); - if (curr_psi == NO_DATA) continue; - - float curr_phi = (params.phi_num_bytes <= 0 || image_data.phi_params == nullptr) - ? reinterpret_cast(phi_vect)[pixel_index] - : ReadEncodedPixel(phi_vect, pixel_index, params.phi_num_bytes, - image_data.phi_params[i]); - if (curr_phi == NO_DATA) continue; - - if (curr_psi != NO_DATA && curr_phi != NO_DATA) { + PsiPhi pixel_vals = read_encoded_psi_phi(psi_phi_meta, psi_phi_vect, i, current_y, current_x); + if (pixel_vals.psi != NO_DATA && pixel_vals.phi != NO_DATA) { curr_trj.obs_count++; - psi_sum += curr_psi; - phi_sum += curr_phi; - psi_array[num_seen] = curr_psi; - phi_array[num_seen] = curr_phi; - if (curr_phi != 0.0) lc_array[num_seen] = curr_psi / curr_phi; + psi_sum += pixel_vals.psi; + phi_sum += pixel_vals.phi; + psi_array[num_seen] = pixel_vals.psi; + phi_array[num_seen] = pixel_vals.phi; + if (pixel_vals.phi != 0.0) lc_array[num_seen] = pixel_vals.psi / pixel_vals.phi; num_seen += 1; } } @@ -260,137 +239,57 @@ __global__ void searchFilterImages(int num_images, int width, int height, void * } } -template -void *encodeImage(float *image_vect, int num_times, int num_pixels, scaleParameters *params, bool debug) { - void *device_vect = NULL; - - long unsigned int total_size = sizeof(T) * num_times * num_pixels; - if (debug) { - printf("Encoding image into %lu bytes/pixel for a total of %lu bytes.\n", sizeof(T), total_size); - } - - // Do the encoding locally first. - T *encoded = (T *)malloc(total_size); - for (int t = 0; t < num_times; ++t) { - float safe_max = params[t].max_val - params[t].scale / 100.0; - for (int p = 0; p < num_pixels; ++p) { - int index = t * num_pixels + p; - float value = image_vect[index]; - if (value == NO_DATA) { - encoded[index] = 0; - } else { - value = min(value, safe_max); - value = max(value, params[t].min_val); - value = (value - params[t].min_val) / params[t].scale + 1.0; - encoded[index] = static_cast(value); - } - } - } - - // Allocate the space on device and do a direct copy. - checkCudaErrors(cudaMalloc((void **)&device_vect, total_size)); - checkCudaErrors(cudaMemcpy(device_vect, encoded, total_size, cudaMemcpyHostToDevice)); - - // Free the local space. - free(encoded); - - return device_vect; -} - -void *encodeImageFloat(float *image_vect, unsigned int vectLength, bool debug) { - void *device_vect = NULL; - long unsigned int total_size = sizeof(float) * vectLength; - - if (debug) { - printf("Encoding image as float for a total of %lu bytes.\n", total_size); - } - - checkCudaErrors(cudaMalloc((void **)&device_vect, total_size)); - checkCudaErrors(cudaMemcpy(device_vect, image_vect, total_size, cudaMemcpyHostToDevice)); - return device_vect; -} - - - -extern "C" void deviceSearchFilter(int num_images, int width, int height, float *psi_vect, float *phi_vect, - PerImageData img_data, SearchParameters params, int num_trajectories, - Trajectory *trj_to_search, int num_results, Trajectory *best_results) { +extern "C" void deviceSearchFilter(PsiPhiArray &psi_phi_array, float *image_times, SearchParameters params, + int num_trajectories, Trajectory *trj_to_search, int num_results, + Trajectory *best_results) { // Allocate Device memory Trajectory *device_tests; float *device_img_times; - void *device_psi; - void *device_phi; + void *device_psi_phi; Trajectory *device_search_results; - scaleParameters *device_psi_params = nullptr; - scaleParameters *device_phi_params = nullptr; // Check the hard coded maximum number of images against the num_images. + int num_images = psi_phi_array.get_num_times(); if (num_images > MAX_NUM_IMAGES) { throw std::runtime_error("Number of images exceeds GPU maximum."); } + // Allocate and copy the device_psi_phi_vector. + if (psi_phi_array.cpu_array_allocated() == false) { + throw std::runtime_error("PsiPhi data has not been created."); + } + if (params.debug) { + printf("Allocating %lu bytes for GPU psi/phi data (%i images, %i pixels per image).\n", + psi_phi_array.get_total_array_size(), psi_phi_array.get_num_times(), + psi_phi_array.get_pixels_per_image()); + } + checkCudaErrors(cudaMalloc((void **)&device_psi_phi, psi_phi_array.get_total_array_size())); + checkCudaErrors(cudaMemcpy(device_psi_phi, psi_phi_array.get_cpu_array_ptr(), + psi_phi_array.get_total_array_size(), cudaMemcpyHostToDevice)); + + // Copy trajectories to search if (params.debug) { - printf("Allocating %lu bytes for testing grid.\n", sizeof(Trajectory) * num_trajectories); + printf("Allocating %lu bytes testing grid with %i elements.\n", sizeof(Trajectory) * num_trajectories, + num_trajectories); } checkCudaErrors(cudaMalloc((void **)&device_tests, sizeof(Trajectory) * num_trajectories)); + checkCudaErrors(cudaMemcpy(device_tests, trj_to_search, sizeof(Trajectory) * num_trajectories, + cudaMemcpyHostToDevice)); + // Copy the time vector. if (params.debug) { printf("Allocating %lu bytes for time data.\n", sizeof(float) * num_images); } checkCudaErrors(cudaMalloc((void **)&device_img_times, sizeof(float) * num_images)); + checkCudaErrors( + cudaMemcpy(device_img_times, image_times, sizeof(float) * num_images, cudaMemcpyHostToDevice)); + // Allocate space for the results. if (params.debug) { - printf("Allocating %lu bytes for testing grid.\n", sizeof(Trajectory) * num_trajectories); + printf("Allocating %lu bytes for %i results.\n", sizeof(Trajectory) * num_results, num_results); } checkCudaErrors(cudaMalloc((void **)&device_search_results, sizeof(Trajectory) * num_results)); - // Copy trajectories to search - checkCudaErrors(cudaMemcpy(device_tests, trj_to_search, sizeof(Trajectory) * num_trajectories, - cudaMemcpyHostToDevice)); - - // Copy image times - checkCudaErrors(cudaMemcpy(device_img_times, img_data.image_times, sizeof(float) * num_images, - cudaMemcpyHostToDevice)); - - // Copy (and encode) the images. Also copy over the scaling parameters if needed. - if ((params.psi_num_bytes == 1 || params.psi_num_bytes == 2) && (img_data.psi_params != nullptr)) { - checkCudaErrors(cudaMalloc((void **)&device_psi_params, num_images * sizeof(scaleParameters))); - checkCudaErrors(cudaMemcpy(device_psi_params, img_data.psi_params, - num_images * sizeof(scaleParameters), cudaMemcpyHostToDevice)); - if (params.psi_num_bytes == 1) { - device_psi = encodeImage(psi_vect, num_images, width * height, img_data.psi_params, - params.debug); - } else { - device_psi = encodeImage(psi_vect, num_images, width * height, img_data.psi_params, - params.debug); - } - } else { - device_psi = encodeImageFloat(psi_vect, num_images * width * height, params.debug); - } - if ((params.phi_num_bytes == 1 || params.phi_num_bytes == 2) && (img_data.phi_params != nullptr)) { - checkCudaErrors(cudaMalloc((void **)&device_phi_params, num_images * sizeof(scaleParameters))); - checkCudaErrors(cudaMemcpy(device_phi_params, img_data.phi_params, - num_images * sizeof(scaleParameters), cudaMemcpyHostToDevice)); - if (params.phi_num_bytes == 1) { - device_phi = encodeImage(phi_vect, num_images, width * height, img_data.phi_params, - params.debug); - } else { - device_phi = encodeImage(phi_vect, num_images, width * height, img_data.phi_params, - params.debug); - } - } else { - device_phi = encodeImageFloat(phi_vect, num_images * width * height, params.debug); - } - - // Wrap the per-image data into a struct. This struct will be copied by value - // during the function call, so we don't need to allocate memory for the - // struct itself. We just set the pointers to the on device vectors. - PerImageData device_image_data; - device_image_data.num_images = num_images; - device_image_data.image_times = device_img_times; - device_image_data.psi_params = device_psi_params; - device_image_data.phi_params = device_phi_params; - // Compute the range of starting pixels to use when setting the blocks and threads. // We use the width and height of the search space (as opposed to the image width // and height), meaning the blocks/threads will be indexed relative to the search space. @@ -400,21 +299,17 @@ extern "C" void deviceSearchFilter(int num_images, int width, int height, float dim3 threads(THREAD_DIM_X, THREAD_DIM_Y); // Launch Search - searchFilterImages<<>>(num_images, width, height, device_psi, device_phi, - device_image_data, params, num_trajectories, device_tests, - device_search_results); + searchFilterImages<<>>(psi_phi_array.get_meta_data(), device_psi_phi, device_img_times, + params, num_trajectories, device_tests, device_search_results); // Read back results checkCudaErrors(cudaMemcpy(best_results, device_search_results, sizeof(Trajectory) * num_results, cudaMemcpyDeviceToHost)); // Free the on GPU memory. - if (device_phi_params != nullptr) checkCudaErrors(cudaFree(device_phi_params)); - if (device_psi_params != nullptr) checkCudaErrors(cudaFree(device_psi_params)); - checkCudaErrors(cudaFree(device_phi)); - checkCudaErrors(cudaFree(device_psi)); checkCudaErrors(cudaFree(device_search_results)); checkCudaErrors(cudaFree(device_img_times)); + checkCudaErrors(cudaFree(device_psi_phi)); checkCudaErrors(cudaFree(device_tests)); } @@ -514,16 +409,10 @@ __global__ void deviceGetCoaddStamp(int num_images, int width, int height, float results[trj_offset + pixel_index] = result; } -void deviceGetCoadds(const unsigned int num_images, - const unsigned int width, - const unsigned int height, - std::vector data_refs, - PerImageData image_data, - int num_trajectories, - Trajectory *trajectories, - StampParameters params, - std::vector> &use_index_vect, - float *results) { +void deviceGetCoadds(const unsigned int num_images, const unsigned int width, const unsigned int height, + std::vector data_refs, PerImageData image_data, int num_trajectories, + Trajectory *trajectories, StampParameters params, + std::vector> &use_index_vect, float *results) { // Allocate Device memory Trajectory *device_trjs; int *device_use_index = nullptr; diff --git a/src/kbmod/search/psi_phi_array.cpp b/src/kbmod/search/psi_phi_array.cpp index cbfac390..e119dc05 100644 --- a/src/kbmod/search/psi_phi_array.cpp +++ b/src/kbmod/search/psi_phi_array.cpp @@ -3,22 +3,16 @@ namespace search { -// Declaration of CUDA functions that will be linked in. -#ifdef HAVE_CUDA -extern "C" void device_allocate_psi_phi_array(PsiPhiArray* data); - -extern "C" void device_free_psi_phi_array(PsiPhiArray* data); -#endif - // ------------------------------------------------------- // --- Implementation of core data structure functions --- // ------------------------------------------------------- -PsiPhiArray::PsiPhiArray() { -} +PsiPhiArray::PsiPhiArray() {} PsiPhiArray::~PsiPhiArray() { - clear(); + if (cpu_array_ptr != nullptr) { + free(cpu_array_ptr); + } } void PsiPhiArray::clear() { @@ -27,68 +21,104 @@ void PsiPhiArray::clear() { free(cpu_array_ptr); cpu_array_ptr = nullptr; } -#ifdef HAVE_CUDA - if (gpu_array_ptr != nullptr) { - device_free_psi_phi_array(this); - gpu_array_ptr = nullptr; - } -#endif // Reset the meta data except the encoding information. - num_times = 0; - width = 0; - height = 0; - pixels_per_image = 0; - num_entries = 0; - total_array_size = 0; - - psi_min_val = FLT_MAX; - psi_max_val = -FLT_MAX; - psi_scale = 1.0; - - phi_min_val = FLT_MAX; - phi_max_val = -FLT_MAX; - phi_scale = 1.0; + meta_data.num_times = 0; + meta_data.width = 0; + meta_data.height = 0; + meta_data.pixels_per_image = 0; + meta_data.num_entries = 0; + meta_data.total_array_size = 0; + + meta_data.psi_min_val = FLT_MAX; + meta_data.psi_max_val = -FLT_MAX; + meta_data.psi_scale = 1.0; + + meta_data.phi_min_val = FLT_MAX; + meta_data.phi_max_val = -FLT_MAX; + meta_data.phi_scale = 1.0; } -void PsiPhiArray::set_meta_data(int new_num_bytes, int new_num_times, int new_width, int new_height) { - assertm(new_num_bytes != -1 and new_num_bytes != 1 and new_num_bytes != 2 and new_num_bytes != 4, - "Invalid setting of num_bytes. Must be (-1, 1, 2, or 4)."); - num_bytes = new_num_bytes; - if (num_bytes == 1) { - block_size = sizeof(uint8_t); - } else if (num_bytes == 2) { - block_size = sizeof(uint16_t); +void PsiPhiArray::set_meta_data(int new_num_bytes, int new_num_times, int new_height, int new_width) { + // Validity checking of parameters. + if (new_num_bytes != -1 && new_num_bytes != 1 && new_num_bytes != 2 && new_num_bytes != 4) { + throw std::runtime_error("Invalid setting of num_bytes. Must be (-1, 1, 2, or 4)."); + } + if (new_num_times <= 0) throw std::runtime_error("Invalid num_times passed to set_meta_data.\n"); + if (new_width <= 0) throw std::runtime_error("Invalid width passed to set_meta_data.\n"); + if (new_height <= 0) throw std::runtime_error("Invalid height passed to set_meta_data.\n"); + + // Check that we do not have an array allocated. + if (cpu_array_ptr != nullptr) { + throw std::runtime_error("Cannot change meta data with allocated arrays. Call clear() first."); + } + + meta_data.num_bytes = new_num_bytes; + if (meta_data.num_bytes == 1) { + meta_data.block_size = sizeof(uint8_t); + } else if (meta_data.num_bytes == 2) { + meta_data.block_size = sizeof(uint16_t); } else { - num_bytes = -1; - block_size = sizeof(float); + meta_data.num_bytes = -1; + meta_data.block_size = sizeof(float); } - assertm(num_times > 0 && width > 0 && height > 0, - "Invalid metadata provided to PsiPhiArray"); - assertm(cpu_array_ptr == nullptr, "Cannot change meta data with allocated arrays. Call clear() first."); - num_times = new_num_times; - width = new_width; - height = new_height; - pixels_per_image = width * height; - num_entries = 2 * pixels_per_image * num_times; - total_array_size = block_size * num_entries; + meta_data.num_times = new_num_times; + meta_data.width = new_width; + meta_data.height = new_height; + meta_data.pixels_per_image = meta_data.width * meta_data.height; + meta_data.num_entries = 2 * meta_data.pixels_per_image * meta_data.num_times; + meta_data.total_array_size = meta_data.block_size * meta_data.num_entries; } void PsiPhiArray::set_psi_scaling(float min_val, float max_val, float scale_val) { - assertm(min_val < max_val, "Min value needs to be < max value"); - assertm(scale_val > 0.0, "Scale value must be greater than zero."); - psi_min_val = min_val; - psi_max_val = max_val; - psi_scale = scale_val; + if (min_val > max_val) throw std::runtime_error("Min value needs to be < max value"); + if (scale_val <= 0) throw std::runtime_error("Scale value must be greater than zero."); + meta_data.psi_min_val = min_val; + meta_data.psi_max_val = max_val; + meta_data.psi_scale = scale_val; } void PsiPhiArray::set_phi_scaling(float min_val, float max_val, float scale_val) { - assertm(min_val < max_val, "Min value needs to be < max value"); - assertm(scale_val > 0.0, "Scale value must be greater than zero."); - phi_min_val = min_val; - phi_max_val = max_val; - phi_scale = scale_val; + if (min_val > max_val) throw std::runtime_error("Min value needs to be < max value"); + if (scale_val <= 0) throw std::runtime_error("Scale value must be greater than zero."); + meta_data.phi_min_val = min_val; + meta_data.phi_max_val = max_val; + meta_data.phi_scale = scale_val; +} + +PsiPhi PsiPhiArray::read_psi_phi(int time, int row, int col) { + PsiPhi result = {NO_DATA, NO_DATA}; + + // Array allocation and bounds checking. + if ((cpu_array_ptr == nullptr) || (row < 0) || (col < 0) || (row >= meta_data.height) || + (col >= meta_data.width)) { + return result; + } + + // Compute the in-list index from the row, column, and time. + int start_index = 2 * (meta_data.pixels_per_image * time + row * meta_data.width + col); + + if (meta_data.num_bytes == -1) { + // Short circuit the typical case of float encoding. + // No scaling or shifting done. + result.psi = reinterpret_cast(cpu_array_ptr)[start_index]; + result.phi = reinterpret_cast(cpu_array_ptr)[start_index + 1]; + } else { + // Handle the compressed encodings. + float psi_value = (meta_data.num_bytes == 1) + ? (float)reinterpret_cast(cpu_array_ptr)[start_index] + : (float)reinterpret_cast(cpu_array_ptr)[start_index]; + result.psi = (psi_value == 0.0) ? NO_DATA + : (psi_value - 1.0) * meta_data.psi_scale + meta_data.psi_min_val; + + float phi_value = (meta_data.num_bytes == 1) + ? (float)reinterpret_cast(cpu_array_ptr)[start_index + 1] + : (float)reinterpret_cast(cpu_array_ptr)[start_index + 1]; + result.phi = (phi_value == 0.0) ? NO_DATA + : (phi_value - 1.0) * meta_data.phi_scale + meta_data.phi_min_val; + } + return result; } // ------------------------------------------- @@ -96,7 +126,7 @@ void PsiPhiArray::set_phi_scaling(float min_val, float max_val, float scale_val) // ------------------------------------------- // Compute the min, max, and scale parameter from the a vector of image data. -std::array compute_scale_params_from_image_vect(const std::vector& imgs, int num_bytes) { +std::array compute_scale_params_from_image_vect(const std::vector& imgs, int num_bytes) { int num_images = imgs.size(); // Do a linear pass through the data to compute the scaling parameters for psi and phi. @@ -122,10 +152,12 @@ std::array compute_scale_params_from_image_vect(const std::vector -void set_encode_cpu_psi_phi_array(PsiPhiArray& data, const std::vector& psi_imgs, - const std::vector& phi_imgs) { - assertm(data.get_cpu_array_ptr() != nullptr, "CPU PsiPhi already allocated."); - T *encoded = (T *)malloc(data.get_total_array_size()); +void set_encode_cpu_psi_phi_array(PsiPhiArray& data, const std::vector& psi_imgs, + const std::vector& phi_imgs) { + if (data.get_cpu_array_ptr() != nullptr) { + throw std::runtime_error("CPU PsiPhi already allocated."); + } + T* encoded = (T*)malloc(data.get_total_array_size()); // Create a safe maximum that is slightly less than the true max to avoid // rollover of the unsigned integer. @@ -142,13 +174,14 @@ void set_encode_cpu_psi_phi_array(PsiPhiArray& data, const std::vector // Handle the encoding for the different values. if (num_bytes == 1 || num_bytes == 2) { - psi_value = encode_uint_scalar(psi_value, data.get_psi_min_val(), safe_max_psi, data.get_psi_scale()); - phi_value = encode_uint_scalar(phi_value, data.get_phi_min_val(), safe_max_phi, data.get_phi_scale()); + psi_value = encode_uint_scalar(psi_value, data.get_psi_min_val(), safe_max_psi, + data.get_psi_scale()); + phi_value = encode_uint_scalar(phi_value, data.get_phi_min_val(), safe_max_phi, + data.get_phi_scale()); } encoded[current_index++] = static_cast(psi_value); encoded[current_index++] = static_cast(phi_value); - assertm(current_index <= data.get_num_entries(), "Out of bounds write."); } } } @@ -156,9 +189,27 @@ void set_encode_cpu_psi_phi_array(PsiPhiArray& data, const std::vector data.set_cpu_array_ptr((void*)encoded); } -void fill_psi_phi_array(PsiPhiArray& result_data, - int num_bytes, - const std::vector& psi_imgs, +void set_float_cpu_psi_phi_array(PsiPhiArray& data, const std::vector& psi_imgs, + const std::vector& phi_imgs) { + if (data.get_cpu_array_ptr() != nullptr) { + throw std::runtime_error("CPU PsiPhi already allocated."); + } + float* encoded = (float*)malloc(data.get_total_array_size()); + + int current_index = 0; + for (int t = 0; t < data.get_num_times(); ++t) { + for (int row = 0; row < data.get_height(); ++row) { + for (int col = 0; col < data.get_width(); ++col) { + encoded[current_index++] = psi_imgs[t].get_pixel({row, col}); + encoded[current_index++] = phi_imgs[t].get_pixel({row, col}); + } + } + } + + data.set_cpu_array_ptr((void*)encoded); +} + +void fill_psi_phi_array(PsiPhiArray& result_data, int num_bytes, const std::vector& psi_imgs, const std::vector& phi_imgs) { if (result_data.get_cpu_array_ptr() != nullptr) { return; @@ -166,34 +217,33 @@ void fill_psi_phi_array(PsiPhiArray& result_data, // Set the meta data and do a bunch of validity checks. int num_times = psi_imgs.size(); - assertm(num_times > 0, "Trying to fill PsiPhi from empty vectors."); - assertm(num_times = phi_imgs.size(), "Size mismatch between psi and phi."); + if (num_times <= 0) throw std::runtime_error("Trying to fill PsiPhi from empty vectors."); + if (num_times != phi_imgs.size()) throw std::runtime_error("Size mismatch between psi and phi."); int width = phi_imgs[0].get_width(); int height = phi_imgs[0].get_height(); - result_data.set_meta_data(num_bytes, num_times, width, height); - - // Compute the scaling parameters. - std::array psi_params = compute_scale_params_from_image_vect(psi_imgs, result_data.get_num_bytes()); - result_data.set_psi_scaling(psi_params[0], psi_params[1], psi_params[2]); - - std::array phi_params = compute_scale_params_from_image_vect(phi_imgs, result_data.get_num_bytes()); - result_data.set_phi_scaling(phi_params[0], phi_params[1], phi_params[2]); - - // Compute the memory footprint for the arrays and do the local encoding. - - if (result_data.get_num_bytes() == 1) { - set_encode_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); - } else if (result_data.get_num_bytes() == 2) { - set_encode_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); - } else { - set_encode_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); + result_data.set_meta_data(num_bytes, num_times, height, width); + + if (result_data.get_num_bytes() == 1 || result_data.get_num_bytes() == 2) { + // Compute the scaling parameters needed for encoding. + std::array psi_params = + compute_scale_params_from_image_vect(psi_imgs, result_data.get_num_bytes()); + result_data.set_psi_scaling(psi_params[0], psi_params[1], psi_params[2]); + + std::array phi_params = + compute_scale_params_from_image_vect(phi_imgs, result_data.get_num_bytes()); + result_data.set_phi_scaling(phi_params[0], phi_params[1], phi_params[2]); + + // Do the local encoding. + if (result_data.get_num_bytes() == 1) { + set_encode_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); + } else { + set_encode_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); + } + } else { + // Just interleave psi and phi images. + set_float_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); } - -#ifdef HAVE_CUDA - // Create a copy of the encoded data in GPU memory. - device_allocate_psi_phi_array(&result_data); -#endif } // ------------------------------------------- @@ -210,26 +260,25 @@ static void psi_phi_array_binding(py::module& m) { .def_readwrite("phi", &search::PsiPhi::phi); py::class_(m, "PsiPhiArray") - .def(py::init<>()) - .def_property_readonly("num_bytes", &ppa::get_num_bytes) - .def_property_readonly("num_times", &ppa::get_num_times) - .def_property_readonly("width", &ppa::get_width) - .def_property_readonly("height", &ppa::get_height) - .def_property_readonly("pixels_per_image", &ppa::get_pixels_per_image) - .def_property_readonly("num_entries", &ppa::get_num_entries) - .def_property_readonly("total_array_size", &ppa::get_total_array_size) - .def_property_readonly("block_size", &ppa::get_block_size) - .def_property_readonly("psi_min_val", &ppa::get_psi_min_val) - .def_property_readonly("psi_max_val", &ppa::get_psi_max_val) - .def_property_readonly("psi_scale", &ppa::get_psi_scale) - .def_property_readonly("phi_min_val", &ppa::get_phi_min_val) - .def_property_readonly("phi_max_val", &ppa::get_phi_max_val) - .def_property_readonly("phi_scale", &ppa::get_phi_scale) - .def_property_readonly("cpu_array_allocated", &ppa::cpu_array_allocated) - .def_property_readonly("gpu_array_allocated", &ppa::gpu_array_allocated) - .def("set_meta_data", &ppa::set_meta_data) - .def("clear", &ppa::clear) - .def("read_encoded_psi_phi", &ppa::read_encoded_psi_phi); + .def(py::init<>()) + .def_property_readonly("num_bytes", &ppa::get_num_bytes) + .def_property_readonly("num_times", &ppa::get_num_times) + .def_property_readonly("width", &ppa::get_width) + .def_property_readonly("height", &ppa::get_height) + .def_property_readonly("pixels_per_image", &ppa::get_pixels_per_image) + .def_property_readonly("num_entries", &ppa::get_num_entries) + .def_property_readonly("total_array_size", &ppa::get_total_array_size) + .def_property_readonly("block_size", &ppa::get_block_size) + .def_property_readonly("psi_min_val", &ppa::get_psi_min_val) + .def_property_readonly("psi_max_val", &ppa::get_psi_max_val) + .def_property_readonly("psi_scale", &ppa::get_psi_scale) + .def_property_readonly("phi_min_val", &ppa::get_phi_min_val) + .def_property_readonly("phi_max_val", &ppa::get_phi_max_val) + .def_property_readonly("phi_scale", &ppa::get_phi_scale) + .def_property_readonly("cpu_array_allocated", &ppa::cpu_array_allocated) + .def("set_meta_data", &ppa::set_meta_data) + .def("clear", &ppa::clear) + .def("read_psi_phi", &ppa::read_psi_phi); m.def("compute_scale_params_from_image_vect", &search::compute_scale_params_from_image_vect); m.def("decode_uint_scalar", &search::decode_uint_scalar); m.def("encode_uint_scalar", &search::encode_uint_scalar); diff --git a/src/kbmod/search/psi_phi_array_ds.h b/src/kbmod/search/psi_phi_array_ds.h index b191f74f..15b573d5 100644 --- a/src/kbmod/search/psi_phi_array_ds.h +++ b/src/kbmod/search/psi_phi_array_ds.h @@ -36,9 +36,33 @@ inline float decode_uint_scalar(float value, float min_val, float scale) { return (value == 0.0) ? NO_DATA : (value - 1.0) * scale + min_val; } -/* PsiPhiArray is a class to hold the psi and phi arrays for the GPU - as well as functions to do encoding and decoding. - All functions used by CUDA are inlined and defined in the header. +// The struct of meta data for the PsiPhiArray. +struct PsiPhiArrayMeta { + int num_times = 0; + int width = 0; + int height = 0; + int pixels_per_image = 0; + int num_entries = 0; + int block_size = 0; + long unsigned total_array_size = 0; + + // Pointers the array (CPU space) + void* cpu_array_ptr = nullptr; + + // Compression and scaling parameters of on GPU array. + int num_bytes = -1; // -1 (float), 1 (unit8) or 2 (unit16) + + float psi_min_val = FLT_MAX; + float psi_max_val = -FLT_MAX; + float psi_scale = 1.0; + + float phi_min_val = FLT_MAX; + float phi_max_val = -FLT_MAX; + float phi_scale = 1.0; +}; + +/* PsiPhiArray is a class to hold the psi and phi arrays for the CPU and GPU as well as + the meta data and functions to do encoding and decoding on CPU. */ class PsiPhiArray { public: @@ -47,104 +71,46 @@ class PsiPhiArray { void clear(); + inline PsiPhiArrayMeta& get_meta_data() { return meta_data; } + // --- Getter functions (for Python interface) ---------------- - inline int get_num_bytes() { return num_bytes; } - inline int get_num_times() { return num_times; } - inline int get_width() { return width; } - inline int get_height() { return height; } - inline int get_pixels_per_image() { return pixels_per_image; } - inline int get_num_entries() { return num_entries; } - inline int get_total_array_size() { return total_array_size; } - inline int get_block_size() { return block_size; } - - inline float get_psi_min_val() { return psi_min_val; } - inline float get_psi_max_val() { return psi_max_val; } - inline float get_psi_scale() { return psi_scale; } - inline float get_phi_min_val() { return phi_min_val; } - inline float get_phi_max_val() { return phi_max_val; } - inline float get_phi_scale() { return phi_scale; } + inline int get_num_bytes() { return meta_data.num_bytes; } + inline int get_num_times() { return meta_data.num_times; } + inline int get_width() { return meta_data.width; } + inline int get_height() { return meta_data.height; } + inline int get_pixels_per_image() { return meta_data.pixels_per_image; } + inline int get_num_entries() { return meta_data.num_entries; } + inline long unsigned get_total_array_size() { return meta_data.total_array_size; } + inline int get_block_size() { return meta_data.block_size; } + + inline float get_psi_min_val() { return meta_data.psi_min_val; } + inline float get_psi_max_val() { return meta_data.psi_max_val; } + inline float get_psi_scale() { return meta_data.psi_scale; } + inline float get_phi_min_val() { return meta_data.phi_min_val; } + inline float get_phi_max_val() { return meta_data.phi_max_val; } + inline float get_phi_scale() { return meta_data.phi_scale; } inline bool cpu_array_allocated() { return cpu_array_ptr != nullptr; } - inline bool gpu_array_allocated() { return gpu_array_ptr != nullptr; } // Primary getter function for interaction (read the data). - inline PsiPhi read_encoded_psi_phi(int time, int row, int col, bool from_gpu); + PsiPhi read_psi_phi(int time, int row, int col); // Setters for the utility functions to allocate the data. - void set_meta_data(int new_num_bytes, int new_num_times, int new_width, int new_height); + void set_meta_data(int new_num_bytes, int new_num_times, int new_height, int new_width); void set_psi_scaling(float min_val, float max_val, float scale_val); void set_phi_scaling(float min_val, float max_val, float scale_val); - // Array pointer functions needed for CUDA memory management. Should ONLY be called - // by those utility functions. All other access should be done through the getters above. + // Should ONLY be called by the utility functions. inline void* get_cpu_array_ptr() { return cpu_array_ptr; } - inline void* get_gpu_array_ptr() { return gpu_array_ptr; } inline void set_cpu_array_ptr(void* new_ptr) { cpu_array_ptr = new_ptr; } - inline void set_gpu_array_ptr(void* new_ptr) { gpu_array_ptr = new_ptr; } private: - inline int psi_index(int time, int row, int col) const { - return 2 * (pixels_per_image * time + row * width + col); - } - - inline int phi_index(int time, int row, int col) const { - return 2 * (pixels_per_image * time + row * width + col) + 1; - } - - // --- Attributes ----------------------------- - int num_times = 0; - int width = 0; - int height = 0; - int pixels_per_image = 0; - int num_entries = 0; - int block_size = 0; - long unsigned total_array_size = 0; + PsiPhiArrayMeta meta_data; - // Pointers the CPU and GPU array. + // Pointers the array (CPU space). void* cpu_array_ptr = nullptr; - void* gpu_array_ptr = nullptr; - - // Compression and scaling parameters of on GPU array. - int num_bytes = -1; // -1 (float), 1 (unit8) or 2 (unit16) - - float psi_min_val = FLT_MAX; - float psi_max_val = -FLT_MAX; - float psi_scale = 1.0; - - float phi_min_val = FLT_MAX; - float phi_max_val = -FLT_MAX; - float phi_scale = 1.0; }; -// read_encoded_psi_phi() is implemented in the header file so the CUDA files do not need -// to link against psi_phi_array.cpp. -inline PsiPhi PsiPhiArray::read_encoded_psi_phi(int time, int row, int col, bool from_gpu) { - void* array_ptr = (from_gpu) ? gpu_array_ptr : cpu_array_ptr; - assertm(array_ptr != nullptr, "Image data not allocated."); - - // Compute the in list index from the row, column, and time. - int start_index = psi_index(time, row, col); - assertm((start_index >= 0) && (start_index < num_entries), "Invalid index."); - - // Short circuit the typical case of float encoding. - // No scaling or shifting done. - PsiPhi result; - if (num_bytes == -1) { - result.psi = reinterpret_cast(array_ptr)[start_index]; - result.phi = reinterpret_cast(array_ptr)[start_index + 1]; - } else { - // Handle the compressed encodings. - float psi_value = (num_bytes == 1) ? (float)reinterpret_cast(array_ptr)[start_index] - : (float)reinterpret_cast(array_ptr)[start_index]; - result.psi = decode_uint_scalar(psi_value, psi_min_val, psi_scale); - - float phi_value = (num_bytes == 1) ? (float)reinterpret_cast(array_ptr)[start_index + 1] - : (float)reinterpret_cast(array_ptr)[start_index + 1]; - result.phi = decode_uint_scalar(phi_value, phi_min_val, phi_scale); - } - return result; -} - } /* namespace search */ #endif /* PSI_PHI_ARRAY_DS_ */ diff --git a/src/kbmod/search/psi_phi_array_utils.h b/src/kbmod/search/psi_phi_array_utils.h index e0b56217..c69db11e 100644 --- a/src/kbmod/search/psi_phi_array_utils.h +++ b/src/kbmod/search/psi_phi_array_utils.h @@ -4,7 +4,7 @@ * The utility functions for the psi/phi array. Broken out from the header * data structure so that it can use packages that won't be imported into the * CUDA kernel, such as Eigen. - * + * * Created on: Dec 8, 2023 */ @@ -23,11 +23,9 @@ namespace search { // Compute the min, max, and scale parameter from the a vector of image data. -std::array compute_scale_params_from_image_vect(const std::vector& imgs, int num_bytes); +std::array compute_scale_params_from_image_vect(const std::vector& imgs, int num_bytes); -void fill_psi_phi_array(PsiPhiArray& result_data, - int num_bytes, - const std::vector& psi_imgs, +void fill_psi_phi_array(PsiPhiArray& result_data, int num_bytes, const std::vector& psi_imgs, const std::vector& phi_imgs); } /* namespace search */ diff --git a/src/kbmod/search/stack_search.cpp b/src/kbmod/search/stack_search.cpp index a85c21b8..85f4f456 100644 --- a/src/kbmod/search/stack_search.cpp +++ b/src/kbmod/search/stack_search.cpp @@ -2,9 +2,9 @@ namespace search { #ifdef HAVE_CUDA -extern "C" void deviceSearchFilter(int num_images, int width, int height, float* psi_vect, float* phi_vect, - PerImageData img_data, SearchParameters params, int num_trajectories, - Trajectory* trj_to_search, int num_results, Trajectory* best_results); +extern "C" void deviceSearchFilter(PsiPhiArray& psi_phi_data, float* image_times, SearchParameters params, + int num_trajectories, Trajectory* trj_to_search, int num_results, + Trajectory* best_results); #endif StackSearch::StackSearch(ImageStack& imstack) : stack(imstack) { @@ -78,32 +78,15 @@ void StackSearch::search(int ang_steps, int vel_steps, float min_ang, float max_ prepare_psi_phi(); create_search_list(ang_steps, vel_steps, min_ang, max_ang, min_vel, mavx); + // Create a data stucture for the per-image data. + std::vector image_times = stack.build_zeroed_times(); + DebugTimer psi_phi_timer = DebugTimer("Creating psi/phi buffers", debug_info); prepare_psi_phi(); - std::vector psi_vect; - std::vector phi_vect; - fill_psi_phi(psi_images, phi_images, &psi_vect, &phi_vect); + PsiPhiArray psi_phi_data; + fill_psi_phi_array(psi_phi_data, params.psi_num_bytes, psi_images, phi_images); psi_phi_timer.stop(); - // Create a data stucture for the per-image data. - std::vector image_times = stack.build_zeroed_times(); - PerImageData img_data; - img_data.num_images = stack.img_count(); - img_data.image_times = image_times.data(); - - // Compute the encoding parameters for psi and phi if needed. - // Vectors need to be created outside the if so they stay in scope. - std::vector psi_scale_vect; - std::vector phi_scale_vect; - if (params.psi_num_bytes > 0) { - psi_scale_vect = compute_image_scaling(psi_images, params.psi_num_bytes); - img_data.psi_params = psi_scale_vect.data(); - } - if (params.phi_num_bytes > 0) { - phi_scale_vect = compute_image_scaling(phi_images, params.phi_num_bytes); - img_data.phi_params = phi_scale_vect.data(); - } - // Allocate a vector for the results. int num_search_pixels = ((params.x_start_max - params.x_start_min) * (params.y_start_max - params.y_start_min)); @@ -122,9 +105,8 @@ void StackSearch::search(int ang_steps, int vel_steps, float min_ang, float max_ // Do the actual search on the GPU. DebugTimer search_timer = DebugTimer("Running search", debug_info); #ifdef HAVE_CUDA - deviceSearchFilter(stack.img_count(), stack.get_width(), stack.get_height(), psi_vect.data(), - phi_vect.data(), img_data, params, search_list.size(), search_list.data(), max_results, - results.data()); + deviceSearchFilter(psi_phi_data, image_times.data(), params, search_list.size(), search_list.data(), + max_results, results.data()); #else throw std::runtime_error("Non-GPU search is not implemented."); #endif @@ -162,37 +144,6 @@ void StackSearch::prepare_psi_phi() { } } -std::vector StackSearch::compute_image_scaling(const std::vector& vect, - int encoding_bytes) const { - std::vector result; - DebugTimer timer = DebugTimer("Computing image scaling", debug_info); - - const int num_images = vect.size(); - for (int i = 0; i < num_images; ++i) { - scaleParameters params; - params.scale = 1.0; - - std::array bnds = vect[i].compute_bounds(); - params.min_val = bnds[0]; - params.max_val = bnds[1]; - - // Increase width to avoid divide by zero. - float width = (params.max_val - params.min_val); - if (width < 1e-6) width = 1e-6; - - // Set the scale if we are encoding the values. - if (encoding_bytes == 1 || encoding_bytes == 2) { - long int num_values = (1 << (8 * encoding_bytes)) - 1; - params.scale = width / (double)num_values; - } - - result.push_back(params); - } - - timer.stop(); - return result; -} - void StackSearch::save_images(const std::string& path) { for (int i = 0; i < stack.img_count(); ++i) { std::string number = std::to_string(i); @@ -230,39 +181,6 @@ void StackSearch::create_search_list(int angle_steps, int velocity_steps, float timer.stop(); } -void StackSearch::fill_psi_phi(const std::vector& psi_imgs, const std::vector& phi_imgs, - std::vector* psi_vect, std::vector* phi_vect) { - assert(psi_vect != NULL); - assert(phi_vect != NULL); - - int num_images = psi_imgs.size(); - assert(num_images > 0); - assert(phi_imgs.size() == num_images); - - int num_pixels = psi_imgs[0].get_npixels(); - for (int i = 0; i < num_images; ++i) { - assert(psi_imgs[i].get_npixels() == num_pixels); - assert(phi_imgs[i].get_npixels() == num_pixels); - } - - psi_vect->clear(); - psi_vect->reserve(num_images * num_pixels); - phi_vect->clear(); - phi_vect->reserve(num_images * num_pixels); - - for (int i = 0; i < num_images; ++i) { - // I don't understand why it's super important psi_imgs here MUST BE - // reshaped in RowMajor order explicitly - no other reshape required this - // TODO: Understand why this is different than masks or stamps f.e. - const auto& psi_ref = psi_imgs[i].get_image().reshaped(); - const auto& phi_ref = phi_imgs[i].get_image().reshaped(); - for (unsigned p = 0; p < num_pixels; ++p) { - psi_vect->push_back(psi_ref[p]); - phi_vect->push_back(phi_ref[p]); - } - } -} - Point StackSearch::get_trajectory_position(const Trajectory& t, int i) const { float time = stack.get_zeroed_time(i); return {t.x + time * t.vx, t.y + time * t.vy}; diff --git a/src/kbmod/search/stack_search.h b/src/kbmod/search/stack_search.h index 88e4586b..face71d6 100644 --- a/src/kbmod/search/stack_search.h +++ b/src/kbmod/search/stack_search.h @@ -16,6 +16,8 @@ #include "geom.h" #include "image_stack.h" #include "psf.h" +#include "psi_phi_array_ds.h" +#include "psi_phi_array_utils.h" #include "pydocs/stack_search_docs.h" #include "stamp_creator.h" @@ -78,14 +80,6 @@ class StackSearch { void sort_results(); std::vector create_curves(Trajectory t, const std::vector& imgs); - // Fill an interleaved vector for the GPU functions. - void fill_psi_phi(const std::vector& psi_imgs, const std::vector& phi_imgs, - std::vector* psi_vect, std::vector* phi_vect); - - // Set the parameter min/max/scale from the psi/phi/other images. - std::vector compute_image_scaling(const std::vector& vect, - int encoding_bytes) const; - // Creates list of trajectories to search. void create_search_list(int angle_steps, int velocity_steps, float min_ang, float max_ang, float min_vel, float max_vel); diff --git a/tests/test_psi_phi_array.py b/tests/test_psi_phi_array.py index 224f004f..373f2398 100644 --- a/tests/test_psi_phi_array.py +++ b/tests/test_psi_phi_array.py @@ -43,7 +43,7 @@ def test_set_meta_data(self): self.assertEqual(arr.total_array_size, 0) # Make float - arr.set_meta_data(-1, self.num_times, self.width, self.height) + arr.set_meta_data(-1, self.num_times, self.height, self.width) self.assertEqual(arr.num_bytes, -1) self.assertEqual(arr.block_size, 4) self.assertEqual(arr.num_times, self.num_times) @@ -54,7 +54,7 @@ def test_set_meta_data(self): self.assertEqual(arr.total_array_size, 4 * arr.num_entries) # Make uint8 - arr.set_meta_data(1, self.num_times, self.width, self.height) + arr.set_meta_data(1, self.num_times, self.height, self.width) self.assertEqual(arr.num_bytes, 1) self.assertEqual(arr.block_size, 1) self.assertEqual(arr.num_times, self.num_times) @@ -65,7 +65,7 @@ def test_set_meta_data(self): self.assertEqual(arr.total_array_size, 1 * arr.num_entries) # Make uint16 - arr.set_meta_data(2, self.num_times, self.width, self.height) + arr.set_meta_data(2, self.num_times, self.height, self.width) self.assertEqual(arr.num_bytes, 2) self.assertEqual(arr.block_size, 2) self.assertEqual(arr.num_times, self.num_times) @@ -139,23 +139,20 @@ def test_fill_psi_phi_array(self): self.assertEqual(arr.block_size, num_bytes) self.assertEqual(arr.total_array_size, arr.num_entries * arr.block_size) - # Check that we can read the values from the CPU array. - self.assertTrue(arr.cpu_array_allocated) - self.assertTrue(arr.gpu_array_allocated) # Check that we can correctly read the values from the CPU. + self.assertTrue(arr.cpu_array_allocated) for time in range(self.num_times): offset = time * self.width * self.height for row in range(self.height): for col in range(self.width): - val = arr.read_encoded_psi_phi(time, row, col, False) + val = arr.read_psi_phi(time, row, col) self.assertAlmostEqual(val.psi, offset + row * self.width + col, delta=0.05) self.assertAlmostEqual(val.phi, 0.1 * (time + 1), delta=1e-5) # Check that the arrays are set to NULL after we clear it (memory should be freed too). arr.clear() self.assertFalse(arr.cpu_array_allocated) - self.assertFalse(arr.gpu_array_allocated) if __name__ == "__main__": From 04c9ae6a489fca19ecc2b5998bd6577333a25747 Mon Sep 17 00:00:00 2001 From: Jeremy Kubica <104161096+jeremykubica@users.noreply.github.com> Date: Mon, 11 Dec 2023 13:58:23 -0500 Subject: [PATCH 6/7] Update --- src/kbmod/search/kernels.cu | 39 +++-- src/kbmod/search/psi_phi_array.cpp | 67 ++++++--- src/kbmod/search/psi_phi_array_ds.h | 7 +- src/kbmod/search/pydocs/psi_phi_array_docs.h | 144 +++++++++++++++++++ 4 files changed, 219 insertions(+), 38 deletions(-) create mode 100644 src/kbmod/search/pydocs/psi_phi_array_docs.h diff --git a/src/kbmod/search/kernels.cu b/src/kbmod/search/kernels.cu index 157ae377..c38c7041 100644 --- a/src/kbmod/search/kernels.cu +++ b/src/kbmod/search/kernels.cu @@ -23,6 +23,28 @@ namespace search { +extern "C" void device_allocate_psi_phi_array(PsiPhiArray* data) { + if (!data->cpu_array_allocated()) + throw std::runtime_error("CPU data is not allocated."); + if (data->gpu_array_allocated()) + throw std::runtime_error("GPU data is already allocated."); + + void* device_array_ptr; + checkCudaErrors(cudaMalloc((void **)&device_array_ptr, data->get_total_array_size())); + checkCudaErrors(cudaMemcpy(device_array_ptr, + data->get_cpu_array_ptr(), + data->get_total_array_size(), + cudaMemcpyHostToDevice)); + data->set_gpu_array_ptr(device_array_ptr); +} + +extern "C" void device_free_psi_phi_array(PsiPhiArray* data) { + if (data->gpu_array_allocated()) { + checkCudaErrors(cudaFree(data->get_gpu_array_ptr())); + data->set_gpu_array_ptr(nullptr); + } +} + __forceinline__ __device__ PsiPhi read_encoded_psi_phi(PsiPhiArrayMeta ¶ms, void *psi_phi_vect, int time, int row, int col) { // Bounds checking. @@ -245,7 +267,6 @@ extern "C" void deviceSearchFilter(PsiPhiArray &psi_phi_array, float *image_time // Allocate Device memory Trajectory *device_tests; float *device_img_times; - void *device_psi_phi; Trajectory *device_search_results; // Check the hard coded maximum number of images against the num_images. @@ -254,18 +275,9 @@ extern "C" void deviceSearchFilter(PsiPhiArray &psi_phi_array, float *image_time throw std::runtime_error("Number of images exceeds GPU maximum."); } - // Allocate and copy the device_psi_phi_vector. - if (psi_phi_array.cpu_array_allocated() == false) { + // Check that the device psi_phi vector has been allocated. + if (psi_phi_array.gpu_array_allocated() == false) { throw std::runtime_error("PsiPhi data has not been created."); - } - if (params.debug) { - printf("Allocating %lu bytes for GPU psi/phi data (%i images, %i pixels per image).\n", - psi_phi_array.get_total_array_size(), psi_phi_array.get_num_times(), - psi_phi_array.get_pixels_per_image()); - } - checkCudaErrors(cudaMalloc((void **)&device_psi_phi, psi_phi_array.get_total_array_size())); - checkCudaErrors(cudaMemcpy(device_psi_phi, psi_phi_array.get_cpu_array_ptr(), - psi_phi_array.get_total_array_size(), cudaMemcpyHostToDevice)); // Copy trajectories to search if (params.debug) { @@ -299,7 +311,7 @@ extern "C" void deviceSearchFilter(PsiPhiArray &psi_phi_array, float *image_time dim3 threads(THREAD_DIM_X, THREAD_DIM_Y); // Launch Search - searchFilterImages<<>>(psi_phi_array.get_meta_data(), device_psi_phi, device_img_times, + searchFilterImages<<>>(psi_phi_array.get_meta_data(), psi_phi_array.get_gpu_array_ptr() device_img_times, params, num_trajectories, device_tests, device_search_results); // Read back results @@ -309,7 +321,6 @@ extern "C" void deviceSearchFilter(PsiPhiArray &psi_phi_array, float *image_time // Free the on GPU memory. checkCudaErrors(cudaFree(device_search_results)); checkCudaErrors(cudaFree(device_img_times)); - checkCudaErrors(cudaFree(device_psi_phi)); checkCudaErrors(cudaFree(device_tests)); } diff --git a/src/kbmod/search/psi_phi_array.cpp b/src/kbmod/search/psi_phi_array.cpp index e119dc05..bb0337e1 100644 --- a/src/kbmod/search/psi_phi_array.cpp +++ b/src/kbmod/search/psi_phi_array.cpp @@ -1,8 +1,16 @@ #include "psi_phi_array_ds.h" #include "psi_phi_array_utils.h" +#include "pydocs/psi_phi_array_docs.h" namespace search { +// Declaration of CUDA functions that will be linked in. +#ifdef HAVE_CUDA +extern "C" void device_allocate_psi_phi_array(PsiPhiArray* data); + +extern "C" void device_free_psi_phi_array(PsiPhiArray* data); +#endif + // ------------------------------------------------------- // --- Implementation of core data structure functions --- // ------------------------------------------------------- @@ -13,6 +21,11 @@ PsiPhiArray::~PsiPhiArray() { if (cpu_array_ptr != nullptr) { free(cpu_array_ptr); } +#ifdef HAVE_CUDA + if (gpu_array_ptr != nullptr) { + device_free_psi_phi_array(this); + } +#endif } void PsiPhiArray::clear() { @@ -21,6 +34,12 @@ void PsiPhiArray::clear() { free(cpu_array_ptr); cpu_array_ptr = nullptr; } +#ifdef HAVE_CUDA + if (gpu_array_ptr != nullptr) { + device_free_psi_phi_array(this); + gpu_array_ptr = nullptr; + } +#endif // Reset the meta data except the encoding information. meta_data.num_times = 0; @@ -244,6 +263,11 @@ void fill_psi_phi_array(PsiPhiArray& result_data, int num_bytes, const std::vect // Just interleave psi and phi images. set_float_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs); } + +#ifdef HAVE_CUDA + // Create a copy of the encoded data in GPU memory. + device_allocate_psi_phi_array(&result_data); +#endif } // ------------------------------------------- @@ -254,35 +278,36 @@ void fill_psi_phi_array(PsiPhiArray& result_data, int num_bytes, const std::vect static void psi_phi_array_binding(py::module& m) { using ppa = search::PsiPhiArray; - py::class_(m, "PsiPhi") + py::class_(m, "PsiPhi", pydocs::DOC_PsiPhi)) .def(py::init<>()) .def_readwrite("psi", &search::PsiPhi::psi) .def_readwrite("phi", &search::PsiPhi::phi); - py::class_(m, "PsiPhiArray") + py::class_(m, "PsiPhiArray", DOC_PsiPhiArray) .def(py::init<>()) - .def_property_readonly("num_bytes", &ppa::get_num_bytes) - .def_property_readonly("num_times", &ppa::get_num_times) - .def_property_readonly("width", &ppa::get_width) - .def_property_readonly("height", &ppa::get_height) - .def_property_readonly("pixels_per_image", &ppa::get_pixels_per_image) - .def_property_readonly("num_entries", &ppa::get_num_entries) - .def_property_readonly("total_array_size", &ppa::get_total_array_size) - .def_property_readonly("block_size", &ppa::get_block_size) - .def_property_readonly("psi_min_val", &ppa::get_psi_min_val) - .def_property_readonly("psi_max_val", &ppa::get_psi_max_val) - .def_property_readonly("psi_scale", &ppa::get_psi_scale) - .def_property_readonly("phi_min_val", &ppa::get_phi_min_val) - .def_property_readonly("phi_max_val", &ppa::get_phi_max_val) - .def_property_readonly("phi_scale", &ppa::get_phi_scale) - .def_property_readonly("cpu_array_allocated", &ppa::cpu_array_allocated) - .def("set_meta_data", &ppa::set_meta_data) - .def("clear", &ppa::clear) - .def("read_psi_phi", &ppa::read_psi_phi); + .def_property_readonly("num_bytes", &ppa::get_num_bytes, DOC_PsiPhiArray_get_num_bytes) + .def_property_readonly("num_times", &ppa::get_num_times, DOC_PsiPhiArray_get_num_times) + .def_property_readonly("width", &ppa::get_width, DOC_PsiPhiArray_get_width) + .def_property_readonly("height", &ppa::get_height, DOC_PsiPhiArray_get_height) + .def_property_readonly("pixels_per_image", &ppa::get_pixels_per_image, DOC_PsiPhiArray_get_pixels_per_image) + .def_property_readonly("num_entries", &ppa::get_num_entries, DOC_PsiPhiArray_get_num_entries) + .def_property_readonly("total_array_size", &ppa::get_total_array_size, DOC_PsiPhiArray_get_total_array_size) + .def_property_readonly("block_size", &ppa::get_block_size, DOC_PsiPhiArray_get_block_size) + .def_property_readonly("psi_min_val", &ppa::get_psi_min_val, DOC_PsiPhiArray_get_psi_min_val) + .def_property_readonly("psi_max_val", &ppa::get_psi_max_val, DOC_PsiPhiArray_get_psi_max_val) + .def_property_readonly("psi_scale", &ppa::get_psi_scale, DOC_PsiPhiArray_get_psi_scale) + .def_property_readonly("phi_min_val", &ppa::get_phi_min_val, DOC_PsiPhiArray_get_phi_min_val) + .def_property_readonly("phi_max_val", &ppa::get_phi_max_val, DOC_PsiPhiArray_get_phi_max_val) + .def_property_readonly("phi_scale", &ppa::get_phi_scale, DOC_PsiPhiArray_get_phi_scale) + .def_property_readonly("cpu_array_allocated", &ppa::cpu_array_allocated, DOC_PsiPhiArray_get_cpu_array_allocated) + .def_property_readonly("gpu_array_allocated", &ppa::gpu_array_allocated, DOC_PsiPhiArray_get_gpu_array_allocated) + .def("set_meta_data", &ppa::set_meta_data, DOC_PsiPhiArray_set_meta_data) + .def("clear", &ppa::clear, DOC_PsiPhiArray_clear) + .def("read_psi_phi", &ppa::read_psi_phi, DOC_PsiPhiArray_read_psi_phi); m.def("compute_scale_params_from_image_vect", &search::compute_scale_params_from_image_vect); m.def("decode_uint_scalar", &search::decode_uint_scalar); m.def("encode_uint_scalar", &search::encode_uint_scalar); - m.def("fill_psi_phi_array", &search::fill_psi_phi_array); + m.def("fill_psi_phi_array", &search::fill_psi_phi_array, DOC_PsiPhiArray_fill_psi_phi_array); } #endif diff --git a/src/kbmod/search/psi_phi_array_ds.h b/src/kbmod/search/psi_phi_array_ds.h index 15b573d5..2e49d91d 100644 --- a/src/kbmod/search/psi_phi_array_ds.h +++ b/src/kbmod/search/psi_phi_array_ds.h @@ -46,9 +46,6 @@ struct PsiPhiArrayMeta { int block_size = 0; long unsigned total_array_size = 0; - // Pointers the array (CPU space) - void* cpu_array_ptr = nullptr; - // Compression and scaling parameters of on GPU array. int num_bytes = -1; // -1 (float), 1 (unit8) or 2 (unit16) @@ -91,6 +88,7 @@ class PsiPhiArray { inline float get_phi_scale() { return meta_data.phi_scale; } inline bool cpu_array_allocated() { return cpu_array_ptr != nullptr; } + inline bool gpu_array_allocated() { return gpu_array_ptr != nullptr; } // Primary getter function for interaction (read the data). PsiPhi read_psi_phi(int time, int row, int col); @@ -102,13 +100,16 @@ class PsiPhiArray { // Should ONLY be called by the utility functions. inline void* get_cpu_array_ptr() { return cpu_array_ptr; } + inline void* get_gpu_array_ptr() { return gpu_array_ptr; } inline void set_cpu_array_ptr(void* new_ptr) { cpu_array_ptr = new_ptr; } + inline void set_gpu_array_ptr(void* new_ptr) { gpu_array_ptr = new_ptr; } private: PsiPhiArrayMeta meta_data; // Pointers the array (CPU space). void* cpu_array_ptr = nullptr; + void* gpu_array_ptr = nullptr; }; } /* namespace search */ diff --git a/src/kbmod/search/pydocs/psi_phi_array_docs.h b/src/kbmod/search/pydocs/psi_phi_array_docs.h new file mode 100644 index 00000000..59c914bf --- /dev/null +++ b/src/kbmod/search/pydocs/psi_phi_array_docs.h @@ -0,0 +1,144 @@ +#ifndef PSI_PHI_ARRAY_DOCS +#define PSI_PHI_ARRAY_DOCS + +namespace pydocs { + +static const auto DOC_PsiPhi = R"doc( + A named tuple for psi and phi values. + + Attributes + ---------- + psi : `float` + The psi value at a pixel. + phi : `float` + The phi value at a pixel. + )doc"; + + static const auto DOC_PsiPhiArray = R"doc( + An encoded array of Psi and Phi values along with their meta data. + )doc"; + +static const auto DOC_PsiPhiArray_get_num_bytes = R"doc( + The setting for encoding (-1 for float, 1 for uint8, and 2 for uint16) + )doc"; + +static const auto DOC_PsiPhiArray_get_num_times = R"doc( + The number of times. + )doc"; + +static const auto DOC_PsiPhiArray_get_width = R"doc( + The image width. + )doc"; + +static const auto DOC_PsiPhiArray_get_height = R"doc( + The image height. + )doc"; + +static const auto DOC_PsiPhiArray_get_pixels_per_image = R"doc( + The number of pixels per each image. + )doc"; + +static const auto DOC_PsiPhiArray_get_num_entries = R"doc( + The number of array entries. + )doc"; + +static const auto DOC_PsiPhiArray_get_total_array_size = R"doc( + The size of the array in bytes. + )doc"; + +static const auto DOC_PsiPhiArray_get_block_size = R"doc( + The size of a single entry in bytes. + )doc"; + +static const auto DOC_PsiPhiArray_get_psi_min_val = R"doc( + The minimum value of psi used in the scaling computations. + )doc"; + +static const auto DOC_PsiPhiArray_get_psi_max_val = R"doc( + The maximum value of psi used in the scaling computations. + )doc"; + +static const auto DOC_PsiPhiArray_get_psi_scale = R"doc( + The scaling parameter for psi. + )doc"; + +static const auto DOC_PsiPhiArray_get_phi_min_val = R"doc( + The minimum value of phi used in the scaling computations. + )doc"; + +static const auto DOC_PsiPhiArray_get_phi_max_val = R"doc( + The maximum value of phi used in the scaling computations. + )doc"; + +static const auto DOC_PsiPhiArray_get_phi_scale = R"doc( + The scaling parameter for phi. + )doc"; + +static const auto DOC_PsiPhiArray_get_phi_scale = R"doc( + The scaling parameter for phi. + )doc"; + +static const auto DOC_PsiPhiArray_get_cpu_array_allocated = R"doc( + A Boolean indicating whether the cpu array exists. + )doc"; + +static const auto DOC_PsiPhiArray_get_gpu_array_allocated = R"doc( + A Boolean indicating whether the gpu array exists. + )doc"; + +static const auto DOC_PsiPhiArray_clear = R"doc( + Clear all data and free the arrays. + )doc"; + +static const auto DOC_PsiPhiArray_read_psi_phi = R"doc( + Read a PsiPhi value from the CPU array. + + Parameters + ---------- + time : `int` + The timestep to read. + row : `int` + The row in the image (y-dimension) + col : `int` + The column in the image (x-dimension) + + Returns + ------- + `PsiPhi` + The pixel values. + )doc"; + +static const auto DOC_PsiPhiArray_set_meta_data = R"doc( + Set the meta data for the array. Automatically called by + fill_psi_phi_array(). + + Parameters + ---------- + new_num_bytes : `int` + The type of encoding to use (-1, 1, or 2). + new_num_times : `int` + The number of time steps in the data. + new_height : `int` + The height of each image in pixels. + new_width : `int` + The width of each image in pixels. + )doc"; + +static const auto DOC_PsiPhiArray_fill_psi_phi_array = R"doc( + Fill the PsiPhiArray from Psi and Phi images. + + Parameters + ---------- + result_data : `PsiPhiArray` + The location to store the data. + num_bytes : `int` + The type of encoding to use (-1, 1, or 2). + psi_imgs : `list` + A list of psi images. + phi_imgs : `list` + A list of phi images. + )doc"; + +} // namespace pydocs + +#endif /* PSI_PHI_ARRAY_DOCS */ From 3e20b7f6afc1daf26072d002e1a11432709ca7dc Mon Sep 17 00:00:00 2001 From: Jeremy Kubica <104161096+jeremykubica@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:45:29 -0500 Subject: [PATCH 7/7] Various fixes --- src/kbmod/search/kernels.cu | 5 +- src/kbmod/search/psi_phi_array.cpp | 61 +++++++++++--------- src/kbmod/search/psi_phi_array_ds.h | 10 +++- src/kbmod/search/pydocs/psi_phi_array_docs.h | 15 ++--- tests/test_psi_phi_array.py | 13 ++--- 5 files changed, 58 insertions(+), 46 deletions(-) diff --git a/src/kbmod/search/kernels.cu b/src/kbmod/search/kernels.cu index c38c7041..ea477a7f 100644 --- a/src/kbmod/search/kernels.cu +++ b/src/kbmod/search/kernels.cu @@ -54,7 +54,7 @@ __forceinline__ __device__ PsiPhi read_encoded_psi_phi(PsiPhiArrayMeta ¶ms, // Compute the in-list index from the row, column, and time. int start_index = 2 * (params.pixels_per_image * time + row * params.width + col); - if (params.num_bytes == -1) { + if (params.num_bytes == 4) { // Short circuit the typical case of float encoding. No scaling or shifting done. return {reinterpret_cast(psi_phi_vect)[start_index], reinterpret_cast(psi_phi_vect)[start_index + 1]}; @@ -278,6 +278,7 @@ extern "C" void deviceSearchFilter(PsiPhiArray &psi_phi_array, float *image_time // Check that the device psi_phi vector has been allocated. if (psi_phi_array.gpu_array_allocated() == false) { throw std::runtime_error("PsiPhi data has not been created."); + } // Copy trajectories to search if (params.debug) { @@ -311,7 +312,7 @@ extern "C" void deviceSearchFilter(PsiPhiArray &psi_phi_array, float *image_time dim3 threads(THREAD_DIM_X, THREAD_DIM_Y); // Launch Search - searchFilterImages<<>>(psi_phi_array.get_meta_data(), psi_phi_array.get_gpu_array_ptr() device_img_times, + searchFilterImages<<>>(psi_phi_array.get_meta_data(), psi_phi_array.get_gpu_array_ptr(), device_img_times, params, num_trajectories, device_tests, device_search_results); // Read back results diff --git a/src/kbmod/search/psi_phi_array.cpp b/src/kbmod/search/psi_phi_array.cpp index bb0337e1..6b99e652 100644 --- a/src/kbmod/search/psi_phi_array.cpp +++ b/src/kbmod/search/psi_phi_array.cpp @@ -25,7 +25,7 @@ PsiPhiArray::~PsiPhiArray() { if (gpu_array_ptr != nullptr) { device_free_psi_phi_array(this); } -#endif +#endif } void PsiPhiArray::clear() { @@ -61,7 +61,7 @@ void PsiPhiArray::clear() { void PsiPhiArray::set_meta_data(int new_num_bytes, int new_num_times, int new_height, int new_width) { // Validity checking of parameters. if (new_num_bytes != -1 && new_num_bytes != 1 && new_num_bytes != 2 && new_num_bytes != 4) { - throw std::runtime_error("Invalid setting of num_bytes. Must be (-1, 1, 2, or 4)."); + throw std::runtime_error("Invalid setting of num_bytes. Must be (-1 [use default], 1, 2, or 4)."); } if (new_num_times <= 0) throw std::runtime_error("Invalid num_times passed to set_meta_data.\n"); if (new_width <= 0) throw std::runtime_error("Invalid width passed to set_meta_data.\n"); @@ -78,7 +78,7 @@ void PsiPhiArray::set_meta_data(int new_num_bytes, int new_num_times, int new_he } else if (meta_data.num_bytes == 2) { meta_data.block_size = sizeof(uint16_t); } else { - meta_data.num_bytes = -1; + meta_data.num_bytes = 4; meta_data.block_size = sizeof(float); } @@ -118,7 +118,7 @@ PsiPhi PsiPhiArray::read_psi_phi(int time, int row, int col) { // Compute the in-list index from the row, column, and time. int start_index = 2 * (meta_data.pixels_per_image * time + row * meta_data.width + col); - if (meta_data.num_bytes == -1) { + if (meta_data.num_bytes == 4) { // Short circuit the typical case of float encoding. // No scaling or shifting done. result.psi = reinterpret_cast(cpu_array_ptr)[start_index]; @@ -278,36 +278,45 @@ void fill_psi_phi_array(PsiPhiArray& result_data, int num_bytes, const std::vect static void psi_phi_array_binding(py::module& m) { using ppa = search::PsiPhiArray; - py::class_(m, "PsiPhi", pydocs::DOC_PsiPhi)) + py::class_(m, "PsiPhi", pydocs::DOC_PsiPhi) .def(py::init<>()) .def_readwrite("psi", &search::PsiPhi::psi) .def_readwrite("phi", &search::PsiPhi::phi); - py::class_(m, "PsiPhiArray", DOC_PsiPhiArray) + py::class_(m, "PsiPhiArray", pydocs::DOC_PsiPhiArray) .def(py::init<>()) - .def_property_readonly("num_bytes", &ppa::get_num_bytes, DOC_PsiPhiArray_get_num_bytes) - .def_property_readonly("num_times", &ppa::get_num_times, DOC_PsiPhiArray_get_num_times) - .def_property_readonly("width", &ppa::get_width, DOC_PsiPhiArray_get_width) - .def_property_readonly("height", &ppa::get_height, DOC_PsiPhiArray_get_height) - .def_property_readonly("pixels_per_image", &ppa::get_pixels_per_image, DOC_PsiPhiArray_get_pixels_per_image) - .def_property_readonly("num_entries", &ppa::get_num_entries, DOC_PsiPhiArray_get_num_entries) - .def_property_readonly("total_array_size", &ppa::get_total_array_size, DOC_PsiPhiArray_get_total_array_size) - .def_property_readonly("block_size", &ppa::get_block_size, DOC_PsiPhiArray_get_block_size) - .def_property_readonly("psi_min_val", &ppa::get_psi_min_val, DOC_PsiPhiArray_get_psi_min_val) - .def_property_readonly("psi_max_val", &ppa::get_psi_max_val, DOC_PsiPhiArray_get_psi_max_val) - .def_property_readonly("psi_scale", &ppa::get_psi_scale, DOC_PsiPhiArray_get_psi_scale) - .def_property_readonly("phi_min_val", &ppa::get_phi_min_val, DOC_PsiPhiArray_get_phi_min_val) - .def_property_readonly("phi_max_val", &ppa::get_phi_max_val, DOC_PsiPhiArray_get_phi_max_val) - .def_property_readonly("phi_scale", &ppa::get_phi_scale, DOC_PsiPhiArray_get_phi_scale) - .def_property_readonly("cpu_array_allocated", &ppa::cpu_array_allocated, DOC_PsiPhiArray_get_cpu_array_allocated) - .def_property_readonly("gpu_array_allocated", &ppa::gpu_array_allocated, DOC_PsiPhiArray_get_gpu_array_allocated) - .def("set_meta_data", &ppa::set_meta_data, DOC_PsiPhiArray_set_meta_data) - .def("clear", &ppa::clear, DOC_PsiPhiArray_clear) - .def("read_psi_phi", &ppa::read_psi_phi, DOC_PsiPhiArray_read_psi_phi); + .def_property_readonly("num_bytes", &ppa::get_num_bytes, pydocs::DOC_PsiPhiArray_get_num_bytes) + .def_property_readonly("num_times", &ppa::get_num_times, pydocs::DOC_PsiPhiArray_get_num_times) + .def_property_readonly("width", &ppa::get_width, pydocs::DOC_PsiPhiArray_get_width) + .def_property_readonly("height", &ppa::get_height, pydocs::DOC_PsiPhiArray_get_height) + .def_property_readonly("pixels_per_image", &ppa::get_pixels_per_image, + pydocs::DOC_PsiPhiArray_get_pixels_per_image) + .def_property_readonly("num_entries", &ppa::get_num_entries, + pydocs::DOC_PsiPhiArray_get_num_entries) + .def_property_readonly("total_array_size", &ppa::get_total_array_size, + pydocs::DOC_PsiPhiArray_get_total_array_size) + .def_property_readonly("block_size", &ppa::get_block_size, pydocs::DOC_PsiPhiArray_get_block_size) + .def_property_readonly("psi_min_val", &ppa::get_psi_min_val, + pydocs::DOC_PsiPhiArray_get_psi_min_val) + .def_property_readonly("psi_max_val", &ppa::get_psi_max_val, + pydocs::DOC_PsiPhiArray_get_psi_max_val) + .def_property_readonly("psi_scale", &ppa::get_psi_scale, pydocs::DOC_PsiPhiArray_get_psi_scale) + .def_property_readonly("phi_min_val", &ppa::get_phi_min_val, + pydocs::DOC_PsiPhiArray_get_phi_min_val) + .def_property_readonly("phi_max_val", &ppa::get_phi_max_val, + pydocs::DOC_PsiPhiArray_get_phi_max_val) + .def_property_readonly("phi_scale", &ppa::get_phi_scale, pydocs::DOC_PsiPhiArray_get_phi_scale) + .def_property_readonly("cpu_array_allocated", &ppa::cpu_array_allocated, + pydocs::DOC_PsiPhiArray_get_cpu_array_allocated) + .def_property_readonly("gpu_array_allocated", &ppa::gpu_array_allocated, + pydocs::DOC_PsiPhiArray_get_gpu_array_allocated) + .def("set_meta_data", &ppa::set_meta_data, pydocs::DOC_PsiPhiArray_set_meta_data) + .def("clear", &ppa::clear, pydocs::DOC_PsiPhiArray_clear) + .def("read_psi_phi", &ppa::read_psi_phi, pydocs::DOC_PsiPhiArray_read_psi_phi); m.def("compute_scale_params_from_image_vect", &search::compute_scale_params_from_image_vect); m.def("decode_uint_scalar", &search::decode_uint_scalar); m.def("encode_uint_scalar", &search::encode_uint_scalar); - m.def("fill_psi_phi_array", &search::fill_psi_phi_array, DOC_PsiPhiArray_fill_psi_phi_array); + m.def("fill_psi_phi_array", &search::fill_psi_phi_array, pydocs::DOC_PsiPhiArray_fill_psi_phi_array); } #endif diff --git a/src/kbmod/search/psi_phi_array_ds.h b/src/kbmod/search/psi_phi_array_ds.h index 2e49d91d..4643dd33 100644 --- a/src/kbmod/search/psi_phi_array_ds.h +++ b/src/kbmod/search/psi_phi_array_ds.h @@ -6,6 +6,12 @@ * from the rest of the utility functions) to allow the CUDA files to import * only what they need. * + * The data structure allocates memory on both the CPU and GPU for the + * interleaved psi/phi array and maintains ownership of the pointers + * until clear() is called or the PsiPhiArray's destructor is called. This allows + * the object to be passed repeatedly to the on-device search without reallocating + * and copying the memory on the GPU. + * * Created on: Dec 5, 2023 */ @@ -43,11 +49,11 @@ struct PsiPhiArrayMeta { int height = 0; int pixels_per_image = 0; int num_entries = 0; - int block_size = 0; + int block_size = 0; // Actual memory used per entry. long unsigned total_array_size = 0; // Compression and scaling parameters of on GPU array. - int num_bytes = -1; // -1 (float), 1 (unit8) or 2 (unit16) + int num_bytes = 4; // 1 (unit8), 2 (unit16), or 4 (float) float psi_min_val = FLT_MAX; float psi_max_val = -FLT_MAX; diff --git a/src/kbmod/search/pydocs/psi_phi_array_docs.h b/src/kbmod/search/pydocs/psi_phi_array_docs.h index 59c914bf..ffb7b009 100644 --- a/src/kbmod/search/pydocs/psi_phi_array_docs.h +++ b/src/kbmod/search/pydocs/psi_phi_array_docs.h @@ -13,13 +13,14 @@ static const auto DOC_PsiPhi = R"doc( phi : `float` The phi value at a pixel. )doc"; - - static const auto DOC_PsiPhiArray = R"doc( + +static const auto DOC_PsiPhiArray = R"doc( An encoded array of Psi and Phi values along with their meta data. )doc"; static const auto DOC_PsiPhiArray_get_num_bytes = R"doc( - The setting for encoding (-1 for float, 1 for uint8, and 2 for uint16) + The target number of bytes to use for encoding the data (1 for uint8, 2 for uint16, + or 4 for float32). Might differ from actual number of bytes (block_size). )doc"; static const auto DOC_PsiPhiArray_get_num_times = R"doc( @@ -74,10 +75,6 @@ static const auto DOC_PsiPhiArray_get_phi_scale = R"doc( The scaling parameter for phi. )doc"; -static const auto DOC_PsiPhiArray_get_phi_scale = R"doc( - The scaling parameter for phi. - )doc"; - static const auto DOC_PsiPhiArray_get_cpu_array_allocated = R"doc( A Boolean indicating whether the cpu array exists. )doc"; @@ -115,7 +112,7 @@ static const auto DOC_PsiPhiArray_set_meta_data = R"doc( Parameters ---------- new_num_bytes : `int` - The type of encoding to use (-1, 1, or 2). + The type of encoding to use (1, 2, or 4). new_num_times : `int` The number of time steps in the data. new_height : `int` @@ -132,7 +129,7 @@ static const auto DOC_PsiPhiArray_fill_psi_phi_array = R"doc( result_data : `PsiPhiArray` The location to store the data. num_bytes : `int` - The type of encoding to use (-1, 1, or 2). + The type of encoding to use (1, 2, or 4). psi_imgs : `list` A list of psi images. phi_imgs : `list` diff --git a/tests/test_psi_phi_array.py b/tests/test_psi_phi_array.py index 373f2398..a5abb7aa 100644 --- a/tests/test_psi_phi_array.py +++ b/tests/test_psi_phi_array.py @@ -34,7 +34,7 @@ def setUp(self): def test_set_meta_data(self): arr = PsiPhiArray() self.assertEqual(arr.num_times, 0) - self.assertEqual(arr.num_bytes, -1) + self.assertEqual(arr.num_bytes, 4) self.assertEqual(arr.width, 0) self.assertEqual(arr.height, 0) self.assertEqual(arr.pixels_per_image, 0) @@ -43,8 +43,8 @@ def test_set_meta_data(self): self.assertEqual(arr.total_array_size, 0) # Make float - arr.set_meta_data(-1, self.num_times, self.height, self.width) - self.assertEqual(arr.num_bytes, -1) + arr.set_meta_data(4, self.num_times, self.height, self.width) + self.assertEqual(arr.num_bytes, 4) self.assertEqual(arr.block_size, 4) self.assertEqual(arr.num_times, self.num_times) self.assertEqual(arr.width, self.width) @@ -104,7 +104,7 @@ def test_compute_scale_params_from_image_vect(self): max_val = 2 * self.width * self.height - 1 # Parameters for encoding to a float - result_float = compute_scale_params_from_image_vect([self.psi_1, self.psi_2], -1) + result_float = compute_scale_params_from_image_vect([self.psi_1, self.psi_2], 4) self.assertAlmostEqual(result_float[0], 0.0, delta=1e-5) self.assertAlmostEqual(result_float[1], max_val, delta=1e-5) self.assertAlmostEqual(result_float[2], 1.0, delta=1e-5) @@ -122,7 +122,7 @@ def test_compute_scale_params_from_image_vect(self): self.assertAlmostEqual(result_uint16[2], max_val / 65535.0, delta=1e-5) def test_fill_psi_phi_array(self): - for num_bytes in [-1, 2]: + for num_bytes in [2, 4]: arr = PsiPhiArray() fill_psi_phi_array(arr, num_bytes, [self.psi_1, self.psi_2], [self.phi_1, self.phi_2]) @@ -133,13 +133,12 @@ def test_fill_psi_phi_array(self): self.assertEqual(arr.height, self.height) self.assertEqual(arr.pixels_per_image, self.width * self.height) self.assertEqual(arr.num_entries, 2 * arr.pixels_per_image * self.num_times) - if num_bytes == -1: + if num_bytes == 4: self.assertEqual(arr.block_size, 4) else: self.assertEqual(arr.block_size, num_bytes) self.assertEqual(arr.total_array_size, arr.num_entries * arr.block_size) - # Check that we can correctly read the values from the CPU. self.assertTrue(arr.cpu_array_allocated) for time in range(self.num_times):