Skip to content

Commit

Permalink
Merge pull request #403 from dirac-institute/psi_phi_array
Browse files Browse the repository at this point in the history
Redesign how KBMOD encodes data for on GPU use
  • Loading branch information
jeremykubica authored Dec 13, 2023
2 parents 62b6cda + a94dbcf commit 2c7c37b
Show file tree
Hide file tree
Showing 9 changed files with 885 additions and 265 deletions.
2 changes: 2 additions & 0 deletions src/kbmod/search/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ namespace py = pybind11;
#include "stack_search.cpp"
#include "stamp_creator.cpp"
#include "filtering.cpp"
#include "psi_phi_array.cpp"

PYBIND11_MODULE(search, m) {
m.attr("KB_NO_DATA") = pybind11::float_(search::NO_DATA);
Expand All @@ -39,6 +40,7 @@ PYBIND11_MODULE(search, m) {
search::pixel_pos_bindings(m);
search::image_moments_bindings(m);
search::stamp_parameters_bindings(m);
search::psi_phi_array_binding(m);
// Functions from raw_image.cpp
m.def("create_median_image", &search::create_median_image);
m.def("create_summed_image", &search::create_summed_image);
Expand Down
258 changes: 93 additions & 165 deletions src/kbmod/search/kernels.cu

Large diffs are not rendered by default.

323 changes: 323 additions & 0 deletions src/kbmod/search/psi_phi_array.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,323 @@
#include "psi_phi_array_ds.h"
#include "psi_phi_array_utils.h"
#include "pydocs/psi_phi_array_docs.h"

namespace search {

// Declaration of CUDA functions that will be linked in.
#ifdef HAVE_CUDA
extern "C" void device_allocate_psi_phi_array(PsiPhiArray* data);

extern "C" void device_free_psi_phi_array(PsiPhiArray* data);
#endif

// -------------------------------------------------------
// --- Implementation of core data structure functions ---
// -------------------------------------------------------

PsiPhiArray::PsiPhiArray() {}

PsiPhiArray::~PsiPhiArray() {
if (cpu_array_ptr != nullptr) {
free(cpu_array_ptr);
}
#ifdef HAVE_CUDA
if (gpu_array_ptr != nullptr) {
device_free_psi_phi_array(this);
}
#endif
}

void PsiPhiArray::clear() {
// Free all used memory on CPU and GPU.
if (cpu_array_ptr != nullptr) {
free(cpu_array_ptr);
cpu_array_ptr = nullptr;
}
#ifdef HAVE_CUDA
if (gpu_array_ptr != nullptr) {
device_free_psi_phi_array(this);
gpu_array_ptr = nullptr;
}
#endif

// Reset the meta data except the encoding information.
meta_data.num_times = 0;
meta_data.width = 0;
meta_data.height = 0;
meta_data.pixels_per_image = 0;
meta_data.num_entries = 0;
meta_data.total_array_size = 0;

meta_data.psi_min_val = FLT_MAX;
meta_data.psi_max_val = -FLT_MAX;
meta_data.psi_scale = 1.0;

meta_data.phi_min_val = FLT_MAX;
meta_data.phi_max_val = -FLT_MAX;
meta_data.phi_scale = 1.0;
}

void PsiPhiArray::set_meta_data(int new_num_bytes, int new_num_times, int new_height, int new_width) {
// Validity checking of parameters.
if (new_num_bytes != -1 && new_num_bytes != 1 && new_num_bytes != 2 && new_num_bytes != 4) {
throw std::runtime_error("Invalid setting of num_bytes. Must be (-1 [use default], 1, 2, or 4).");
}
if (new_num_times <= 0) throw std::runtime_error("Invalid num_times passed to set_meta_data.\n");
if (new_width <= 0) throw std::runtime_error("Invalid width passed to set_meta_data.\n");
if (new_height <= 0) throw std::runtime_error("Invalid height passed to set_meta_data.\n");

// Check that we do not have an array allocated.
if (cpu_array_ptr != nullptr) {
throw std::runtime_error("Cannot change meta data with allocated arrays. Call clear() first.");
}

meta_data.num_bytes = new_num_bytes;
if (meta_data.num_bytes == 1) {
meta_data.block_size = sizeof(uint8_t);
} else if (meta_data.num_bytes == 2) {
meta_data.block_size = sizeof(uint16_t);
} else {
meta_data.num_bytes = 4;
meta_data.block_size = sizeof(float);
}

meta_data.num_times = new_num_times;
meta_data.width = new_width;
meta_data.height = new_height;
meta_data.pixels_per_image = meta_data.width * meta_data.height;
meta_data.num_entries = 2 * meta_data.pixels_per_image * meta_data.num_times;
meta_data.total_array_size = meta_data.block_size * meta_data.num_entries;
}

void PsiPhiArray::set_psi_scaling(float min_val, float max_val, float scale_val) {
if (min_val > max_val) throw std::runtime_error("Min value needs to be < max value");
if (scale_val <= 0) throw std::runtime_error("Scale value must be greater than zero.");
meta_data.psi_min_val = min_val;
meta_data.psi_max_val = max_val;
meta_data.psi_scale = scale_val;
}

void PsiPhiArray::set_phi_scaling(float min_val, float max_val, float scale_val) {
if (min_val > max_val) throw std::runtime_error("Min value needs to be < max value");
if (scale_val <= 0) throw std::runtime_error("Scale value must be greater than zero.");
meta_data.phi_min_val = min_val;
meta_data.phi_max_val = max_val;
meta_data.phi_scale = scale_val;
}

PsiPhi PsiPhiArray::read_psi_phi(int time, int row, int col) {
PsiPhi result = {NO_DATA, NO_DATA};

// Array allocation and bounds checking.
if ((cpu_array_ptr == nullptr) || (row < 0) || (col < 0) || (row >= meta_data.height) ||
(col >= meta_data.width)) {
return result;
}

// Compute the in-list index from the row, column, and time.
int start_index = 2 * (meta_data.pixels_per_image * time + row * meta_data.width + col);

if (meta_data.num_bytes == 4) {
// Short circuit the typical case of float encoding.
// No scaling or shifting done.
result.psi = reinterpret_cast<float*>(cpu_array_ptr)[start_index];
result.phi = reinterpret_cast<float*>(cpu_array_ptr)[start_index + 1];
} else {
// Handle the compressed encodings.
float psi_value = (meta_data.num_bytes == 1)
? (float)reinterpret_cast<uint8_t*>(cpu_array_ptr)[start_index]
: (float)reinterpret_cast<uint16_t*>(cpu_array_ptr)[start_index];
result.psi = (psi_value == 0.0) ? NO_DATA
: (psi_value - 1.0) * meta_data.psi_scale + meta_data.psi_min_val;

float phi_value = (meta_data.num_bytes == 1)
? (float)reinterpret_cast<uint8_t*>(cpu_array_ptr)[start_index + 1]
: (float)reinterpret_cast<uint16_t*>(cpu_array_ptr)[start_index + 1];
result.phi = (phi_value == 0.0) ? NO_DATA
: (phi_value - 1.0) * meta_data.phi_scale + meta_data.phi_min_val;
}
return result;
}

// -------------------------------------------
// --- Implementation of utility functions ---
// -------------------------------------------

// Compute the min, max, and scale parameter from the a vector of image data.
std::array<float, 3> compute_scale_params_from_image_vect(const std::vector<RawImage>& imgs, int num_bytes) {
int num_images = imgs.size();

// Do a linear pass through the data to compute the scaling parameters for psi and phi.
float min_val = FLT_MAX;
float max_val = -FLT_MAX;
for (int i = 0; i < num_images; ++i) {
std::array<float, 2> bnds = imgs[i].compute_bounds();
if (bnds[0] < min_val) min_val = bnds[0];
if (bnds[1] > max_val) max_val = bnds[1];
}

// Set the scale if we are encoding the values.
float scale = 1.0;
if (num_bytes == 1 || num_bytes == 2) {
float width = (max_val - min_val);
if (width < 1e-6) width = 1e-6; // Avoid a zero width.

long int num_values = (1 << (8 * num_bytes)) - 1;
scale = width / (double)num_values;
}

return {min_val, max_val, scale};
}

template <typename T>
void set_encode_cpu_psi_phi_array(PsiPhiArray& data, const std::vector<RawImage>& psi_imgs,
const std::vector<RawImage>& phi_imgs) {
if (data.get_cpu_array_ptr() != nullptr) {
throw std::runtime_error("CPU PsiPhi already allocated.");
}
T* encoded = (T*)malloc(data.get_total_array_size());

// Create a safe maximum that is slightly less than the true max to avoid
// rollover of the unsigned integer.
float safe_max_psi = data.get_psi_max_val() - data.get_psi_scale() / 100.0;
float safe_max_phi = data.get_phi_max_val() - data.get_phi_scale() / 100.0;

int current_index = 0;
int num_bytes = data.get_num_bytes();
for (int t = 0; t < data.get_num_times(); ++t) {
for (int row = 0; row < data.get_height(); ++row) {
for (int col = 0; col < data.get_width(); ++col) {
float psi_value = psi_imgs[t].get_pixel({row, col});
float phi_value = phi_imgs[t].get_pixel({row, col});

// Handle the encoding for the different values.
if (num_bytes == 1 || num_bytes == 2) {
psi_value = encode_uint_scalar(psi_value, data.get_psi_min_val(), safe_max_psi,
data.get_psi_scale());
phi_value = encode_uint_scalar(phi_value, data.get_phi_min_val(), safe_max_phi,
data.get_phi_scale());
}

encoded[current_index++] = static_cast<T>(psi_value);
encoded[current_index++] = static_cast<T>(phi_value);
}
}
}

data.set_cpu_array_ptr((void*)encoded);
}

void set_float_cpu_psi_phi_array(PsiPhiArray& data, const std::vector<RawImage>& psi_imgs,
const std::vector<RawImage>& phi_imgs) {
if (data.get_cpu_array_ptr() != nullptr) {
throw std::runtime_error("CPU PsiPhi already allocated.");
}
float* encoded = (float*)malloc(data.get_total_array_size());

int current_index = 0;
for (int t = 0; t < data.get_num_times(); ++t) {
for (int row = 0; row < data.get_height(); ++row) {
for (int col = 0; col < data.get_width(); ++col) {
encoded[current_index++] = psi_imgs[t].get_pixel({row, col});
encoded[current_index++] = phi_imgs[t].get_pixel({row, col});
}
}
}

data.set_cpu_array_ptr((void*)encoded);
}

void fill_psi_phi_array(PsiPhiArray& result_data, int num_bytes, const std::vector<RawImage>& psi_imgs,
const std::vector<RawImage>& phi_imgs) {
if (result_data.get_cpu_array_ptr() != nullptr) {
return;
}

// Set the meta data and do a bunch of validity checks.
int num_times = psi_imgs.size();
if (num_times <= 0) throw std::runtime_error("Trying to fill PsiPhi from empty vectors.");
if (num_times != phi_imgs.size()) throw std::runtime_error("Size mismatch between psi and phi.");

int width = phi_imgs[0].get_width();
int height = phi_imgs[0].get_height();
result_data.set_meta_data(num_bytes, num_times, height, width);

if (result_data.get_num_bytes() == 1 || result_data.get_num_bytes() == 2) {
// Compute the scaling parameters needed for encoding.
std::array<float, 3> psi_params =
compute_scale_params_from_image_vect(psi_imgs, result_data.get_num_bytes());
result_data.set_psi_scaling(psi_params[0], psi_params[1], psi_params[2]);

std::array<float, 3> phi_params =
compute_scale_params_from_image_vect(phi_imgs, result_data.get_num_bytes());
result_data.set_phi_scaling(phi_params[0], phi_params[1], phi_params[2]);

// Do the local encoding.
if (result_data.get_num_bytes() == 1) {
set_encode_cpu_psi_phi_array<uint8_t>(result_data, psi_imgs, phi_imgs);
} else {
set_encode_cpu_psi_phi_array<uint16_t>(result_data, psi_imgs, phi_imgs);
}
} else {
// Just interleave psi and phi images.
set_float_cpu_psi_phi_array(result_data, psi_imgs, phi_imgs);
}

#ifdef HAVE_CUDA
// Create a copy of the encoded data in GPU memory.
device_allocate_psi_phi_array(&result_data);
#endif
}

// -------------------------------------------
// --- Python definitions --------------------
// -------------------------------------------

#ifdef Py_PYTHON_H
static void psi_phi_array_binding(py::module& m) {
using ppa = search::PsiPhiArray;

py::class_<search::PsiPhi>(m, "PsiPhi", pydocs::DOC_PsiPhi)
.def(py::init<>())
.def_readwrite("psi", &search::PsiPhi::psi)
.def_readwrite("phi", &search::PsiPhi::phi);

py::class_<ppa>(m, "PsiPhiArray", pydocs::DOC_PsiPhiArray)
.def(py::init<>())
.def_property_readonly("num_bytes", &ppa::get_num_bytes, pydocs::DOC_PsiPhiArray_get_num_bytes)
.def_property_readonly("num_times", &ppa::get_num_times, pydocs::DOC_PsiPhiArray_get_num_times)
.def_property_readonly("width", &ppa::get_width, pydocs::DOC_PsiPhiArray_get_width)
.def_property_readonly("height", &ppa::get_height, pydocs::DOC_PsiPhiArray_get_height)
.def_property_readonly("pixels_per_image", &ppa::get_pixels_per_image,
pydocs::DOC_PsiPhiArray_get_pixels_per_image)
.def_property_readonly("num_entries", &ppa::get_num_entries,
pydocs::DOC_PsiPhiArray_get_num_entries)
.def_property_readonly("total_array_size", &ppa::get_total_array_size,
pydocs::DOC_PsiPhiArray_get_total_array_size)
.def_property_readonly("block_size", &ppa::get_block_size, pydocs::DOC_PsiPhiArray_get_block_size)
.def_property_readonly("psi_min_val", &ppa::get_psi_min_val,
pydocs::DOC_PsiPhiArray_get_psi_min_val)
.def_property_readonly("psi_max_val", &ppa::get_psi_max_val,
pydocs::DOC_PsiPhiArray_get_psi_max_val)
.def_property_readonly("psi_scale", &ppa::get_psi_scale, pydocs::DOC_PsiPhiArray_get_psi_scale)
.def_property_readonly("phi_min_val", &ppa::get_phi_min_val,
pydocs::DOC_PsiPhiArray_get_phi_min_val)
.def_property_readonly("phi_max_val", &ppa::get_phi_max_val,
pydocs::DOC_PsiPhiArray_get_phi_max_val)
.def_property_readonly("phi_scale", &ppa::get_phi_scale, pydocs::DOC_PsiPhiArray_get_phi_scale)
.def_property_readonly("cpu_array_allocated", &ppa::cpu_array_allocated,
pydocs::DOC_PsiPhiArray_get_cpu_array_allocated)
.def_property_readonly("gpu_array_allocated", &ppa::gpu_array_allocated,
pydocs::DOC_PsiPhiArray_get_gpu_array_allocated)
.def("set_meta_data", &ppa::set_meta_data, pydocs::DOC_PsiPhiArray_set_meta_data)
.def("clear", &ppa::clear, pydocs::DOC_PsiPhiArray_clear)
.def("read_psi_phi", &ppa::read_psi_phi, pydocs::DOC_PsiPhiArray_read_psi_phi);
m.def("compute_scale_params_from_image_vect", &search::compute_scale_params_from_image_vect);
m.def("decode_uint_scalar", &search::decode_uint_scalar);
m.def("encode_uint_scalar", &search::encode_uint_scalar);
m.def("fill_psi_phi_array", &search::fill_psi_phi_array, pydocs::DOC_PsiPhiArray_fill_psi_phi_array);
}
#endif

} /* namespace search */
Loading

0 comments on commit 2c7c37b

Please sign in to comment.