Skip to content

Commit

Permalink
Split up common.h
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremykubica committed Sep 19, 2023
1 parent 07db050 commit 16f58e4
Show file tree
Hide file tree
Showing 7 changed files with 303 additions and 287 deletions.
46 changes: 0 additions & 46 deletions src/kbmod/image_base/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ using std::to_string;
PYBIND11_MODULE(search, m) {
m.attr("KB_NO_DATA") = pybind11::float_(NO_DATA);
m.attr("HAS_GPU") = pybind11::bool_(HAVE_GPU);
py::enum_<StampType>(m, "StampType")
.value("STAMP_SUM", StampType::STAMP_SUM)
.value("STAMP_MEAN", StampType::STAMP_MEAN)
.value("STAMP_MEDIAN", StampType::STAMP_MEDIAN)
.export_values();
py::class_<pf>(m, "psf", py::buffer_protocol(), R"pbdoc(
Point Spread Function.
Expand Down Expand Up @@ -176,34 +171,6 @@ PYBIND11_MODULE(search, m) {
.def("get_width", &is::getWidth)
.def("get_height", &is::getHeight)
.def("get_npixels", &is::getNPixels);
py::class_<trajectory>(m, "trajectory", R"pbdoc(
A trajectory structure holding basic information about potential results.
)pbdoc")
.def(py::init<>())
.def_readwrite("x_v", &trajectory::xVel)
.def_readwrite("y_v", &trajectory::yVel)
.def_readwrite("lh", &trajectory::lh)
.def_readwrite("flux", &trajectory::flux)
.def_readwrite("x", &trajectory::x)
.def_readwrite("y", &trajectory::y)
.def_readwrite("obs_count", &trajectory::obsCount)
.def("__repr__",
[](const trajectory &t) {
return "lh: " + to_string(t.lh) + " flux: " + to_string(t.flux) +
" x: " + to_string(t.x) + " y: " + to_string(t.y) + " x_v: " + to_string(t.xVel) +
" y_v: " + to_string(t.yVel) + " obs_count: " + to_string(t.obsCount);
})
.def(py::pickle(
[](const trajectory &p) { // __getstate__
return py::make_tuple(p.xVel, p.yVel, p.lh, p.flux, p.x, p.y, p.obsCount);
},
[](py::tuple t) { // __setstate__
if (t.size() != 7) throw std::runtime_error("Invalid state!");
trajectory trj = {t[0].cast<float>(), t[1].cast<float>(), t[2].cast<float>(),
t[3].cast<float>(), t[4].cast<short>(), t[5].cast<short>(),
t[6].cast<short>()};
return trj;
}));
py::class_<pixelPos>(m, "pixel_pos")
.def(py::init<>())
.def_readwrite("x", &pixelPos::x)
Expand All @@ -217,17 +184,4 @@ PYBIND11_MODULE(search, m) {
.def_readwrite("m11", &imageMoments::m11)
.def_readwrite("m02", &imageMoments::m02)
.def_readwrite("m20", &imageMoments::m20);
py::class_<stampParameters>(m, "stamp_parameters")
.def(py::init<>())
.def_readwrite("radius", &stampParameters::radius)
.def_readwrite("stamp_type", &stampParameters::stamp_type)
.def_readwrite("do_filtering", &stampParameters::do_filtering)
.def_readwrite("center_thresh", &stampParameters::center_thresh)
.def_readwrite("peak_offset_x", &stampParameters::peak_offset_x)
.def_readwrite("peak_offset_y", &stampParameters::peak_offset_y)
.def_readwrite("m01", &stampParameters::m01_limit)
.def_readwrite("m10", &stampParameters::m10_limit)
.def_readwrite("m11", &stampParameters::m11_limit)
.def_readwrite("m02", &stampParameters::m02_limit)
.def_readwrite("m20", &stampParameters::m20_limit);
}
36 changes: 36 additions & 0 deletions src/kbmod/image_base/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* common.h
*
* Created on: Jun 20, 2017
* Author: kbmod-usr
*/

#ifndef COMMON_H_
#define COMMON_H_

#ifdef HAVE_CUDA
constexpr bool HAVE_GPU = true;
#else
constexpr bool HAVE_GPU = false;
#endif

constexpr float NO_DATA = -9999.0;
constexpr unsigned int MAX_KERNEL_RADIUS = 15;

// The position (in pixels) on an image.
struct pixelPos {
float x;
float y;
};

// Basic image moments use for analysis.
struct imageMoments {
float m00;
float m01;
float m10;
float m11;
float m02;
float m20;
};

#endif /* COMMON_H_ */
213 changes: 1 addition & 212 deletions src/kbmod/image_base/image_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,14 @@
#ifndef IMAGE_KERNELS_CU_
#define IMAGE_KERNELS_CU_

#define MAX_STAMP_IMAGES 200
constexpr unsigned short CONV_THREAD_DIM = 32;

#include <assert.h>
#include "common.h"
#include "cuda_errors.h"
#include <stdio.h>
#include <float.h>

#include "ImageStack.h"

namespace image_base {

/*
Expand Down Expand Up @@ -88,215 +86,6 @@ extern "C" void deviceConvolve(float *sourceImg, float *resultImg, int width, in
checkCudaErrors(cudaFree(deviceResultImg));
}

__global__ void device_get_coadd_stamp(int num_images, int width, int height, float *image_vect,
perImageData image_data, int num_trajectories,
trajectory *trajectories, stampParameters params, int *use_index_vect,
float *results) {
// Get the trajectory that we are going to be using.
const int trj_index = blockIdx.x * blockDim.x + threadIdx.x;
if (trj_index < 0 || trj_index >= num_trajectories) return;
trajectory trj = trajectories[trj_index];

// Get the pixel coordinates within the stamp to use.
const int stamp_width = 2 * params.radius + 1;
const int stamp_x = threadIdx.y;
if (stamp_x < 0 || stamp_x >= stamp_width) return;

const int stamp_y = threadIdx.z;
if (stamp_y < 0 || stamp_y >= stamp_width) return;

// Compute the various offsets for the indices.
int use_index_offset = num_images * trj_index;
int trj_offset = trj_index * stamp_width * stamp_width;
int pixel_index = stamp_width * stamp_y + stamp_x;

// Allocate space for the coadd information.
assert(num_images < MAX_STAMP_IMAGES);
float values[MAX_STAMP_IMAGES];

// Loop over each image and compute the stamp.
int num_values = 0;
for (int t = 0; t < num_images; ++t) {
// Skip entries marked 0 in the use_index_vect.
if (use_index_vect != nullptr && use_index_vect[use_index_offset + t] == 0) {
continue;
}

// Predict the trajectory's position including the barycentric correction if needed.
float cTime = image_data.imageTimes[t];
int currentX = int(trj.x + trj.xVel * cTime);
int currentY = int(trj.y + trj.yVel * cTime);
if (image_data.baryCorrs != nullptr) {
baryCorrection bc = image_data.baryCorrs[t];
currentX = int(trj.x + trj.xVel * cTime + bc.dx + trj.x * bc.dxdx + trj.y * bc.dxdy);
currentY = int(trj.y + trj.yVel * cTime + bc.dy + trj.x * bc.dydx + trj.y * bc.dydy);
}

// Get the stamp and add it to the list of values.
int img_x = currentX - params.radius + stamp_x;
int img_y = currentY - params.radius + stamp_y;
if ((img_x >= 0) && (img_x < width) && (img_y >= 0) && (img_y < height)) {
int pixel_index = width * height * t + img_y * width + img_x;
if (image_vect[pixel_index] != NO_DATA) {
values[num_values] = image_vect[pixel_index];
++num_values;
}
}
}

// If there are no values, just return 0.
if (num_values == 0) {
results[trj_offset + pixel_index] = 0.0;
return;
}

// Do the actual computation from the values.
float result = 0.0;
int median_ind = num_values / 2; // Outside switch to avoid compiler warnings.
switch (params.stamp_type) {
case STAMP_MEDIAN:
// Sort the values in ascending order.
for (int j = 0; j < num_values; j++) {
for (int k = j + 1; k < num_values; k++) {
if (values[j] > values[k]) {
float tmp = values[j];
values[j] = values[k];
values[k] = tmp;
}
}
}

// Take the median value of the pixels with data.
if (num_values % 2 == 0) {
result = (values[median_ind] + values[median_ind - 1]) / 2.0;
} else {
result = values[median_ind];
}
break;
case STAMP_SUM:
for (int t = 0; t < num_values; ++t) {
result += values[t];
}
break;
case STAMP_MEAN:
for (int t = 0; t < num_values; ++t) {
result += values[t];
}
result /= float(num_values);
break;
}

// Save the result to the correct pixel location.
results[trj_offset + pixel_index] = result;
}

void deviceGetCoadds(ImageStack &stack, perImageData image_data, int num_trajectories,
trajectory *trajectories, stampParameters params,
std::vector<std::vector<bool>> &use_index_vect, float *results) {
// Allocate Device memory
trajectory *device_trjs;
int *device_use_index = nullptr;
float *device_times;
float *device_img;
float *device_res;
baryCorrection *deviceBaryCorrs = nullptr;

// Compute the dimensions for the data.
const unsigned int num_images = stack.imgCount();
const unsigned int width = stack.getWidth();
const unsigned int height = stack.getHeight();
const unsigned int num_image_pixels = num_images * width * height;
const unsigned int stamp_width = 2 * params.radius + 1;
const unsigned int stamp_ppi = (2 * params.radius + 1) * (2 * params.radius + 1);
const unsigned int num_stamp_pixels = num_trajectories * stamp_ppi;

// Allocate and copy the trajectories.
checkCudaErrors(cudaMalloc((void **)&device_trjs, sizeof(trajectory) * num_trajectories));
checkCudaErrors(cudaMemcpy(device_trjs, trajectories, sizeof(trajectory) * num_trajectories,
cudaMemcpyHostToDevice));

// Check if we need to create a vector of per-trajectory, per-image use.
// Convert the vector of booleans into an integer array so we do a cudaMemcpy.
if (use_index_vect.size() == num_trajectories) {
checkCudaErrors(cudaMalloc((void **)&device_use_index, sizeof(int) * num_images * num_trajectories));

int *start_ptr = device_use_index;
std::vector<int> int_vect(num_images, 0);
for (unsigned i = 0; i < num_trajectories; ++i) {
assert(use_index_vect[i].size() == num_images);
for (unsigned t = 0; t < num_images; ++t) {
int_vect[t] = use_index_vect[i][t] ? 1 : 0;
}

checkCudaErrors(
cudaMemcpy(start_ptr, int_vect.data(), sizeof(int) * num_images, cudaMemcpyHostToDevice));
start_ptr += num_images;
}
}

// Allocate and copy the times.
checkCudaErrors(cudaMalloc((void **)&device_times, sizeof(float) * num_images));
checkCudaErrors(cudaMemcpy(device_times, image_data.imageTimes, sizeof(float) * num_images,
cudaMemcpyHostToDevice));

// Allocate and copy the images.
checkCudaErrors(cudaMalloc((void **)&device_img, sizeof(float) * num_image_pixels));
float *next_ptr = device_img;
for (unsigned t = 0; t < num_images; ++t) {
const std::vector<float> &data_ref = stack.getSingleImage(t).getScience().getPixels();

assert(data_ref.size() == width * height);
checkCudaErrors(cudaMemcpy(next_ptr, data_ref.data(), sizeof(float) * width * height,
cudaMemcpyHostToDevice));
next_ptr += width * height;
}

// Allocate space for the results.
checkCudaErrors(cudaMalloc((void **)&device_res, sizeof(float) * num_stamp_pixels));

// Allocate memory for and copy barycentric corrections (if needed).
if (image_data.baryCorrs != nullptr) {
checkCudaErrors(cudaMalloc((void **)&deviceBaryCorrs, sizeof(baryCorrection) * num_images));
checkCudaErrors(cudaMemcpy(deviceBaryCorrs, image_data.baryCorrs, sizeof(baryCorrection) * num_images,
cudaMemcpyHostToDevice));
}

// Wrap the per-image data into a struct. This struct will be copied by value
// during the function call, so we don't need to allocate memory for the
// struct itself. We just set the pointers to the on device vectors.
perImageData device_image_data;
device_image_data.numImages = num_images;
device_image_data.imageTimes = device_times;
device_image_data.baryCorrs = deviceBaryCorrs;
device_image_data.psiParams = nullptr;
device_image_data.phiParams = nullptr;

dim3 blocks(num_trajectories, 1, 1);
dim3 threads(1, stamp_width, stamp_width);

// Create the stamps.
device_get_coadd_stamp<<<blocks, threads>>>(num_images, width, height, device_img, device_image_data,
num_trajectories, device_trjs, params, device_use_index,
device_res);
cudaDeviceSynchronize();

// Free up the unneeded memory (everything except for the on-device results).
if (deviceBaryCorrs != nullptr) checkCudaErrors(cudaFree(deviceBaryCorrs));
checkCudaErrors(cudaFree(device_img));
if (device_use_index != nullptr) checkCudaErrors(cudaFree(device_use_index));
checkCudaErrors(cudaFree(device_times));
checkCudaErrors(cudaFree(device_trjs));
cudaDeviceSynchronize();

// Read back results
checkCudaErrors(
cudaMemcpy(results, device_res, sizeof(float) * num_stamp_pixels, cudaMemcpyDeviceToHost));
cudaDeviceSynchronize();

// Free the rest of the on GPU memory.
checkCudaErrors(cudaFree(device_res));
}

} /* namespace image_base */

#endif /* IMAGE_KERNELS_CU_ */
1 change: 1 addition & 0 deletions src/kbmod/search/KBMOSearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "common.h"
#include "ImageStack.h"
#include "PointSpreadFunc.h"
#include "search_structs.h"

using image_base::ImageStack;
using image_base::LayeredImage;
Expand Down
46 changes: 46 additions & 0 deletions src/kbmod/search/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,52 @@ using ks = search::KBMOSearch;
using std::to_string;

PYBIND11_MODULE(search, m) {
py::enum_<StampType>(m, "StampType")
.value("STAMP_SUM", StampType::STAMP_SUM)
.value("STAMP_MEAN", StampType::STAMP_MEAN)
.value("STAMP_MEDIAN", StampType::STAMP_MEDIAN)
.export_values();
py::class_<trajectory>(m, "trajectory", R"pbdoc(
A trajectory structure holding basic information about potential results.
)pbdoc")
.def(py::init<>())
.def_readwrite("x_v", &trajectory::xVel)
.def_readwrite("y_v", &trajectory::yVel)
.def_readwrite("lh", &trajectory::lh)
.def_readwrite("flux", &trajectory::flux)
.def_readwrite("x", &trajectory::x)
.def_readwrite("y", &trajectory::y)
.def_readwrite("obs_count", &trajectory::obsCount)
.def("__repr__",
[](const trajectory &t) {
return "lh: " + to_string(t.lh) + " flux: " + to_string(t.flux) +
" x: " + to_string(t.x) + " y: " + to_string(t.y) + " x_v: " + to_string(t.xVel) +
" y_v: " + to_string(t.yVel) + " obs_count: " + to_string(t.obsCount);
})
.def(py::pickle(
[](const trajectory &p) { // __getstate__
return py::make_tuple(p.xVel, p.yVel, p.lh, p.flux, p.x, p.y, p.obsCount);
},
[](py::tuple t) { // __setstate__
if (t.size() != 7) throw std::runtime_error("Invalid state!");
trajectory trj = {t[0].cast<float>(), t[1].cast<float>(), t[2].cast<float>(),
t[3].cast<float>(), t[4].cast<short>(), t[5].cast<short>(),
t[6].cast<short>()};
return trj;
}));
py::class_<stampParameters>(m, "stamp_parameters")
.def(py::init<>())
.def_readwrite("radius", &stampParameters::radius)
.def_readwrite("stamp_type", &stampParameters::stamp_type)
.def_readwrite("do_filtering", &stampParameters::do_filtering)
.def_readwrite("center_thresh", &stampParameters::center_thresh)
.def_readwrite("peak_offset_x", &stampParameters::peak_offset_x)
.def_readwrite("peak_offset_y", &stampParameters::peak_offset_y)
.def_readwrite("m01", &stampParameters::m01_limit)
.def_readwrite("m10", &stampParameters::m10_limit)
.def_readwrite("m11", &stampParameters::m11_limit)
.def_readwrite("m02", &stampParameters::m02_limit)
.def_readwrite("m20", &stampParameters::m20_limit);
py::class_<ks>(m, "stack_search")
.def(py::init<is &>())
.def("save_psi_phi", &ks::savePsiPhi)
Expand Down
Loading

0 comments on commit 16f58e4

Please sign in to comment.