From 16f58e4690edaad978208128472cdd9f78715ade Mon Sep 17 00:00:00 2001 From: Jeremy Kubica <104161096+jeremykubica@users.noreply.github.com> Date: Tue, 19 Sep 2023 11:31:27 -0400 Subject: [PATCH] Split up common.h --- src/kbmod/image_base/bindings.cpp | 46 ---- src/kbmod/image_base/common.h | 36 +++ src/kbmod/image_base/image_kernels.cu | 213 +---------------- src/kbmod/search/KBMOSearch.h | 1 + src/kbmod/search/bindings.cpp | 46 ++++ src/kbmod/search/kernels.cu | 217 +++++++++++++++++- .../common.h => search/search_structs.h} | 31 +-- 7 files changed, 303 insertions(+), 287 deletions(-) create mode 100644 src/kbmod/image_base/common.h rename src/kbmod/{include/common.h => search/search_structs.h} (82%) diff --git a/src/kbmod/image_base/bindings.cpp b/src/kbmod/image_base/bindings.cpp index d30c06ef..c0e68723 100644 --- a/src/kbmod/image_base/bindings.cpp +++ b/src/kbmod/image_base/bindings.cpp @@ -20,11 +20,6 @@ using std::to_string; PYBIND11_MODULE(search, m) { m.attr("KB_NO_DATA") = pybind11::float_(NO_DATA); m.attr("HAS_GPU") = pybind11::bool_(HAVE_GPU); - py::enum_(m, "StampType") - .value("STAMP_SUM", StampType::STAMP_SUM) - .value("STAMP_MEAN", StampType::STAMP_MEAN) - .value("STAMP_MEDIAN", StampType::STAMP_MEDIAN) - .export_values(); py::class_(m, "psf", py::buffer_protocol(), R"pbdoc( Point Spread Function. @@ -176,34 +171,6 @@ PYBIND11_MODULE(search, m) { .def("get_width", &is::getWidth) .def("get_height", &is::getHeight) .def("get_npixels", &is::getNPixels); - py::class_(m, "trajectory", R"pbdoc( - A trajectory structure holding basic information about potential results. - )pbdoc") - .def(py::init<>()) - .def_readwrite("x_v", &trajectory::xVel) - .def_readwrite("y_v", &trajectory::yVel) - .def_readwrite("lh", &trajectory::lh) - .def_readwrite("flux", &trajectory::flux) - .def_readwrite("x", &trajectory::x) - .def_readwrite("y", &trajectory::y) - .def_readwrite("obs_count", &trajectory::obsCount) - .def("__repr__", - [](const trajectory &t) { - return "lh: " + to_string(t.lh) + " flux: " + to_string(t.flux) + - " x: " + to_string(t.x) + " y: " + to_string(t.y) + " x_v: " + to_string(t.xVel) + - " y_v: " + to_string(t.yVel) + " obs_count: " + to_string(t.obsCount); - }) - .def(py::pickle( - [](const trajectory &p) { // __getstate__ - return py::make_tuple(p.xVel, p.yVel, p.lh, p.flux, p.x, p.y, p.obsCount); - }, - [](py::tuple t) { // __setstate__ - if (t.size() != 7) throw std::runtime_error("Invalid state!"); - trajectory trj = {t[0].cast(), t[1].cast(), t[2].cast(), - t[3].cast(), t[4].cast(), t[5].cast(), - t[6].cast()}; - return trj; - })); py::class_(m, "pixel_pos") .def(py::init<>()) .def_readwrite("x", &pixelPos::x) @@ -217,17 +184,4 @@ PYBIND11_MODULE(search, m) { .def_readwrite("m11", &imageMoments::m11) .def_readwrite("m02", &imageMoments::m02) .def_readwrite("m20", &imageMoments::m20); - py::class_(m, "stamp_parameters") - .def(py::init<>()) - .def_readwrite("radius", &stampParameters::radius) - .def_readwrite("stamp_type", &stampParameters::stamp_type) - .def_readwrite("do_filtering", &stampParameters::do_filtering) - .def_readwrite("center_thresh", &stampParameters::center_thresh) - .def_readwrite("peak_offset_x", &stampParameters::peak_offset_x) - .def_readwrite("peak_offset_y", &stampParameters::peak_offset_y) - .def_readwrite("m01", &stampParameters::m01_limit) - .def_readwrite("m10", &stampParameters::m10_limit) - .def_readwrite("m11", &stampParameters::m11_limit) - .def_readwrite("m02", &stampParameters::m02_limit) - .def_readwrite("m20", &stampParameters::m20_limit); } diff --git a/src/kbmod/image_base/common.h b/src/kbmod/image_base/common.h new file mode 100644 index 00000000..0d97729f --- /dev/null +++ b/src/kbmod/image_base/common.h @@ -0,0 +1,36 @@ +/* + * common.h + * + * Created on: Jun 20, 2017 + * Author: kbmod-usr + */ + +#ifndef COMMON_H_ +#define COMMON_H_ + +#ifdef HAVE_CUDA + constexpr bool HAVE_GPU = true; +#else + constexpr bool HAVE_GPU = false; +#endif + +constexpr float NO_DATA = -9999.0; +constexpr unsigned int MAX_KERNEL_RADIUS = 15; + +// The position (in pixels) on an image. +struct pixelPos { + float x; + float y; +}; + +// Basic image moments use for analysis. +struct imageMoments { + float m00; + float m01; + float m10; + float m11; + float m02; + float m20; +}; + +#endif /* COMMON_H_ */ diff --git a/src/kbmod/image_base/image_kernels.cu b/src/kbmod/image_base/image_kernels.cu index 4bac0417..b3845091 100644 --- a/src/kbmod/image_base/image_kernels.cu +++ b/src/kbmod/image_base/image_kernels.cu @@ -8,7 +8,7 @@ #ifndef IMAGE_KERNELS_CU_ #define IMAGE_KERNELS_CU_ -#define MAX_STAMP_IMAGES 200 +constexpr unsigned short CONV_THREAD_DIM = 32; #include #include "common.h" @@ -16,8 +16,6 @@ #include #include -#include "ImageStack.h" - namespace image_base { /* @@ -88,215 +86,6 @@ extern "C" void deviceConvolve(float *sourceImg, float *resultImg, int width, in checkCudaErrors(cudaFree(deviceResultImg)); } -__global__ void device_get_coadd_stamp(int num_images, int width, int height, float *image_vect, - perImageData image_data, int num_trajectories, - trajectory *trajectories, stampParameters params, int *use_index_vect, - float *results) { - // Get the trajectory that we are going to be using. - const int trj_index = blockIdx.x * blockDim.x + threadIdx.x; - if (trj_index < 0 || trj_index >= num_trajectories) return; - trajectory trj = trajectories[trj_index]; - - // Get the pixel coordinates within the stamp to use. - const int stamp_width = 2 * params.radius + 1; - const int stamp_x = threadIdx.y; - if (stamp_x < 0 || stamp_x >= stamp_width) return; - - const int stamp_y = threadIdx.z; - if (stamp_y < 0 || stamp_y >= stamp_width) return; - - // Compute the various offsets for the indices. - int use_index_offset = num_images * trj_index; - int trj_offset = trj_index * stamp_width * stamp_width; - int pixel_index = stamp_width * stamp_y + stamp_x; - - // Allocate space for the coadd information. - assert(num_images < MAX_STAMP_IMAGES); - float values[MAX_STAMP_IMAGES]; - - // Loop over each image and compute the stamp. - int num_values = 0; - for (int t = 0; t < num_images; ++t) { - // Skip entries marked 0 in the use_index_vect. - if (use_index_vect != nullptr && use_index_vect[use_index_offset + t] == 0) { - continue; - } - - // Predict the trajectory's position including the barycentric correction if needed. - float cTime = image_data.imageTimes[t]; - int currentX = int(trj.x + trj.xVel * cTime); - int currentY = int(trj.y + trj.yVel * cTime); - if (image_data.baryCorrs != nullptr) { - baryCorrection bc = image_data.baryCorrs[t]; - currentX = int(trj.x + trj.xVel * cTime + bc.dx + trj.x * bc.dxdx + trj.y * bc.dxdy); - currentY = int(trj.y + trj.yVel * cTime + bc.dy + trj.x * bc.dydx + trj.y * bc.dydy); - } - - // Get the stamp and add it to the list of values. - int img_x = currentX - params.radius + stamp_x; - int img_y = currentY - params.radius + stamp_y; - if ((img_x >= 0) && (img_x < width) && (img_y >= 0) && (img_y < height)) { - int pixel_index = width * height * t + img_y * width + img_x; - if (image_vect[pixel_index] != NO_DATA) { - values[num_values] = image_vect[pixel_index]; - ++num_values; - } - } - } - - // If there are no values, just return 0. - if (num_values == 0) { - results[trj_offset + pixel_index] = 0.0; - return; - } - - // Do the actual computation from the values. - float result = 0.0; - int median_ind = num_values / 2; // Outside switch to avoid compiler warnings. - switch (params.stamp_type) { - case STAMP_MEDIAN: - // Sort the values in ascending order. - for (int j = 0; j < num_values; j++) { - for (int k = j + 1; k < num_values; k++) { - if (values[j] > values[k]) { - float tmp = values[j]; - values[j] = values[k]; - values[k] = tmp; - } - } - } - - // Take the median value of the pixels with data. - if (num_values % 2 == 0) { - result = (values[median_ind] + values[median_ind - 1]) / 2.0; - } else { - result = values[median_ind]; - } - break; - case STAMP_SUM: - for (int t = 0; t < num_values; ++t) { - result += values[t]; - } - break; - case STAMP_MEAN: - for (int t = 0; t < num_values; ++t) { - result += values[t]; - } - result /= float(num_values); - break; - } - - // Save the result to the correct pixel location. - results[trj_offset + pixel_index] = result; -} - -void deviceGetCoadds(ImageStack &stack, perImageData image_data, int num_trajectories, - trajectory *trajectories, stampParameters params, - std::vector> &use_index_vect, float *results) { - // Allocate Device memory - trajectory *device_trjs; - int *device_use_index = nullptr; - float *device_times; - float *device_img; - float *device_res; - baryCorrection *deviceBaryCorrs = nullptr; - - // Compute the dimensions for the data. - const unsigned int num_images = stack.imgCount(); - const unsigned int width = stack.getWidth(); - const unsigned int height = stack.getHeight(); - const unsigned int num_image_pixels = num_images * width * height; - const unsigned int stamp_width = 2 * params.radius + 1; - const unsigned int stamp_ppi = (2 * params.radius + 1) * (2 * params.radius + 1); - const unsigned int num_stamp_pixels = num_trajectories * stamp_ppi; - - // Allocate and copy the trajectories. - checkCudaErrors(cudaMalloc((void **)&device_trjs, sizeof(trajectory) * num_trajectories)); - checkCudaErrors(cudaMemcpy(device_trjs, trajectories, sizeof(trajectory) * num_trajectories, - cudaMemcpyHostToDevice)); - - // Check if we need to create a vector of per-trajectory, per-image use. - // Convert the vector of booleans into an integer array so we do a cudaMemcpy. - if (use_index_vect.size() == num_trajectories) { - checkCudaErrors(cudaMalloc((void **)&device_use_index, sizeof(int) * num_images * num_trajectories)); - - int *start_ptr = device_use_index; - std::vector int_vect(num_images, 0); - for (unsigned i = 0; i < num_trajectories; ++i) { - assert(use_index_vect[i].size() == num_images); - for (unsigned t = 0; t < num_images; ++t) { - int_vect[t] = use_index_vect[i][t] ? 1 : 0; - } - - checkCudaErrors( - cudaMemcpy(start_ptr, int_vect.data(), sizeof(int) * num_images, cudaMemcpyHostToDevice)); - start_ptr += num_images; - } - } - - // Allocate and copy the times. - checkCudaErrors(cudaMalloc((void **)&device_times, sizeof(float) * num_images)); - checkCudaErrors(cudaMemcpy(device_times, image_data.imageTimes, sizeof(float) * num_images, - cudaMemcpyHostToDevice)); - - // Allocate and copy the images. - checkCudaErrors(cudaMalloc((void **)&device_img, sizeof(float) * num_image_pixels)); - float *next_ptr = device_img; - for (unsigned t = 0; t < num_images; ++t) { - const std::vector &data_ref = stack.getSingleImage(t).getScience().getPixels(); - - assert(data_ref.size() == width * height); - checkCudaErrors(cudaMemcpy(next_ptr, data_ref.data(), sizeof(float) * width * height, - cudaMemcpyHostToDevice)); - next_ptr += width * height; - } - - // Allocate space for the results. - checkCudaErrors(cudaMalloc((void **)&device_res, sizeof(float) * num_stamp_pixels)); - - // Allocate memory for and copy barycentric corrections (if needed). - if (image_data.baryCorrs != nullptr) { - checkCudaErrors(cudaMalloc((void **)&deviceBaryCorrs, sizeof(baryCorrection) * num_images)); - checkCudaErrors(cudaMemcpy(deviceBaryCorrs, image_data.baryCorrs, sizeof(baryCorrection) * num_images, - cudaMemcpyHostToDevice)); - } - - // Wrap the per-image data into a struct. This struct will be copied by value - // during the function call, so we don't need to allocate memory for the - // struct itself. We just set the pointers to the on device vectors. - perImageData device_image_data; - device_image_data.numImages = num_images; - device_image_data.imageTimes = device_times; - device_image_data.baryCorrs = deviceBaryCorrs; - device_image_data.psiParams = nullptr; - device_image_data.phiParams = nullptr; - - dim3 blocks(num_trajectories, 1, 1); - dim3 threads(1, stamp_width, stamp_width); - - // Create the stamps. - device_get_coadd_stamp<<>>(num_images, width, height, device_img, device_image_data, - num_trajectories, device_trjs, params, device_use_index, - device_res); - cudaDeviceSynchronize(); - - // Free up the unneeded memory (everything except for the on-device results). - if (deviceBaryCorrs != nullptr) checkCudaErrors(cudaFree(deviceBaryCorrs)); - checkCudaErrors(cudaFree(device_img)); - if (device_use_index != nullptr) checkCudaErrors(cudaFree(device_use_index)); - checkCudaErrors(cudaFree(device_times)); - checkCudaErrors(cudaFree(device_trjs)); - cudaDeviceSynchronize(); - - // Read back results - checkCudaErrors( - cudaMemcpy(results, device_res, sizeof(float) * num_stamp_pixels, cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - // Free the rest of the on GPU memory. - checkCudaErrors(cudaFree(device_res)); -} - } /* namespace image_base */ #endif /* IMAGE_KERNELS_CU_ */ diff --git a/src/kbmod/search/KBMOSearch.h b/src/kbmod/search/KBMOSearch.h index 4e5e0564..ca6a43de 100644 --- a/src/kbmod/search/KBMOSearch.h +++ b/src/kbmod/search/KBMOSearch.h @@ -23,6 +23,7 @@ #include "common.h" #include "ImageStack.h" #include "PointSpreadFunc.h" +#include "search_structs.h" using image_base::ImageStack; using image_base::LayeredImage; diff --git a/src/kbmod/search/bindings.cpp b/src/kbmod/search/bindings.cpp index a3032936..3abc0a3f 100644 --- a/src/kbmod/search/bindings.cpp +++ b/src/kbmod/search/bindings.cpp @@ -15,6 +15,52 @@ using ks = search::KBMOSearch; using std::to_string; PYBIND11_MODULE(search, m) { + py::enum_(m, "StampType") + .value("STAMP_SUM", StampType::STAMP_SUM) + .value("STAMP_MEAN", StampType::STAMP_MEAN) + .value("STAMP_MEDIAN", StampType::STAMP_MEDIAN) + .export_values(); + py::class_(m, "trajectory", R"pbdoc( + A trajectory structure holding basic information about potential results. + )pbdoc") + .def(py::init<>()) + .def_readwrite("x_v", &trajectory::xVel) + .def_readwrite("y_v", &trajectory::yVel) + .def_readwrite("lh", &trajectory::lh) + .def_readwrite("flux", &trajectory::flux) + .def_readwrite("x", &trajectory::x) + .def_readwrite("y", &trajectory::y) + .def_readwrite("obs_count", &trajectory::obsCount) + .def("__repr__", + [](const trajectory &t) { + return "lh: " + to_string(t.lh) + " flux: " + to_string(t.flux) + + " x: " + to_string(t.x) + " y: " + to_string(t.y) + " x_v: " + to_string(t.xVel) + + " y_v: " + to_string(t.yVel) + " obs_count: " + to_string(t.obsCount); + }) + .def(py::pickle( + [](const trajectory &p) { // __getstate__ + return py::make_tuple(p.xVel, p.yVel, p.lh, p.flux, p.x, p.y, p.obsCount); + }, + [](py::tuple t) { // __setstate__ + if (t.size() != 7) throw std::runtime_error("Invalid state!"); + trajectory trj = {t[0].cast(), t[1].cast(), t[2].cast(), + t[3].cast(), t[4].cast(), t[5].cast(), + t[6].cast()}; + return trj; + })); + py::class_(m, "stamp_parameters") + .def(py::init<>()) + .def_readwrite("radius", &stampParameters::radius) + .def_readwrite("stamp_type", &stampParameters::stamp_type) + .def_readwrite("do_filtering", &stampParameters::do_filtering) + .def_readwrite("center_thresh", &stampParameters::center_thresh) + .def_readwrite("peak_offset_x", &stampParameters::peak_offset_x) + .def_readwrite("peak_offset_y", &stampParameters::peak_offset_y) + .def_readwrite("m01", &stampParameters::m01_limit) + .def_readwrite("m10", &stampParameters::m10_limit) + .def_readwrite("m11", &stampParameters::m11_limit) + .def_readwrite("m02", &stampParameters::m02_limit) + .def_readwrite("m20", &stampParameters::m20_limit); py::class_(m, "stack_search") .def(py::init()) .def("save_psi_phi", &ks::savePsiPhi) diff --git a/src/kbmod/search/kernels.cu b/src/kbmod/search/kernels.cu index b304fb39..43b024d9 100644 --- a/src/kbmod/search/kernels.cu +++ b/src/kbmod/search/kernels.cu @@ -9,14 +9,20 @@ #define KERNELS_CU_ #define GPU_LC_FILTER 1 #define MAX_NUM_IMAGES 140 +#define MAX_STAMP_IMAGES 200 -#include "common.h" #include #include "cuda_errors.h" #include #include #include +#include "common.h" +#include "ImageStack.h" +#include "search_structs.h" + +using image_base::ImageStack; + namespace search { extern "C" __device__ __host__ void sigmaGFilteredIndicesCU(float *values, int num_values, float sGL0, @@ -409,6 +415,215 @@ extern "C" void deviceSearchFilter(int imageCount, int width, int height, float checkCudaErrors(cudaFree(deviceTests)); } +__global__ void device_get_coadd_stamp(int num_images, int width, int height, float *image_vect, + perImageData image_data, int num_trajectories, + trajectory *trajectories, stampParameters params, int *use_index_vect, + float *results) { + // Get the trajectory that we are going to be using. + const int trj_index = blockIdx.x * blockDim.x + threadIdx.x; + if (trj_index < 0 || trj_index >= num_trajectories) return; + trajectory trj = trajectories[trj_index]; + + // Get the pixel coordinates within the stamp to use. + const int stamp_width = 2 * params.radius + 1; + const int stamp_x = threadIdx.y; + if (stamp_x < 0 || stamp_x >= stamp_width) return; + + const int stamp_y = threadIdx.z; + if (stamp_y < 0 || stamp_y >= stamp_width) return; + + // Compute the various offsets for the indices. + int use_index_offset = num_images * trj_index; + int trj_offset = trj_index * stamp_width * stamp_width; + int pixel_index = stamp_width * stamp_y + stamp_x; + + // Allocate space for the coadd information. + assert(num_images < MAX_STAMP_IMAGES); + float values[MAX_STAMP_IMAGES]; + + // Loop over each image and compute the stamp. + int num_values = 0; + for (int t = 0; t < num_images; ++t) { + // Skip entries marked 0 in the use_index_vect. + if (use_index_vect != nullptr && use_index_vect[use_index_offset + t] == 0) { + continue; + } + + // Predict the trajectory's position including the barycentric correction if needed. + float cTime = image_data.imageTimes[t]; + int currentX = int(trj.x + trj.xVel * cTime); + int currentY = int(trj.y + trj.yVel * cTime); + if (image_data.baryCorrs != nullptr) { + baryCorrection bc = image_data.baryCorrs[t]; + currentX = int(trj.x + trj.xVel * cTime + bc.dx + trj.x * bc.dxdx + trj.y * bc.dxdy); + currentY = int(trj.y + trj.yVel * cTime + bc.dy + trj.x * bc.dydx + trj.y * bc.dydy); + } + + // Get the stamp and add it to the list of values. + int img_x = currentX - params.radius + stamp_x; + int img_y = currentY - params.radius + stamp_y; + if ((img_x >= 0) && (img_x < width) && (img_y >= 0) && (img_y < height)) { + int pixel_index = width * height * t + img_y * width + img_x; + if (image_vect[pixel_index] != NO_DATA) { + values[num_values] = image_vect[pixel_index]; + ++num_values; + } + } + } + + // If there are no values, just return 0. + if (num_values == 0) { + results[trj_offset + pixel_index] = 0.0; + return; + } + + // Do the actual computation from the values. + float result = 0.0; + int median_ind = num_values / 2; // Outside switch to avoid compiler warnings. + switch (params.stamp_type) { + case STAMP_MEDIAN: + // Sort the values in ascending order. + for (int j = 0; j < num_values; j++) { + for (int k = j + 1; k < num_values; k++) { + if (values[j] > values[k]) { + float tmp = values[j]; + values[j] = values[k]; + values[k] = tmp; + } + } + } + + // Take the median value of the pixels with data. + if (num_values % 2 == 0) { + result = (values[median_ind] + values[median_ind - 1]) / 2.0; + } else { + result = values[median_ind]; + } + break; + case STAMP_SUM: + for (int t = 0; t < num_values; ++t) { + result += values[t]; + } + break; + case STAMP_MEAN: + for (int t = 0; t < num_values; ++t) { + result += values[t]; + } + result /= float(num_values); + break; + } + + // Save the result to the correct pixel location. + results[trj_offset + pixel_index] = result; +} + +void deviceGetCoadds(ImageStack &stack, perImageData image_data, int num_trajectories, + trajectory *trajectories, stampParameters params, + std::vector> &use_index_vect, float *results) { + // Allocate Device memory + trajectory *device_trjs; + int *device_use_index = nullptr; + float *device_times; + float *device_img; + float *device_res; + baryCorrection *deviceBaryCorrs = nullptr; + + // Compute the dimensions for the data. + const unsigned int num_images = stack.imgCount(); + const unsigned int width = stack.getWidth(); + const unsigned int height = stack.getHeight(); + const unsigned int num_image_pixels = num_images * width * height; + const unsigned int stamp_width = 2 * params.radius + 1; + const unsigned int stamp_ppi = (2 * params.radius + 1) * (2 * params.radius + 1); + const unsigned int num_stamp_pixels = num_trajectories * stamp_ppi; + + // Allocate and copy the trajectories. + checkCudaErrors(cudaMalloc((void **)&device_trjs, sizeof(trajectory) * num_trajectories)); + checkCudaErrors(cudaMemcpy(device_trjs, trajectories, sizeof(trajectory) * num_trajectories, + cudaMemcpyHostToDevice)); + + // Check if we need to create a vector of per-trajectory, per-image use. + // Convert the vector of booleans into an integer array so we do a cudaMemcpy. + if (use_index_vect.size() == num_trajectories) { + checkCudaErrors(cudaMalloc((void **)&device_use_index, sizeof(int) * num_images * num_trajectories)); + + int *start_ptr = device_use_index; + std::vector int_vect(num_images, 0); + for (unsigned i = 0; i < num_trajectories; ++i) { + assert(use_index_vect[i].size() == num_images); + for (unsigned t = 0; t < num_images; ++t) { + int_vect[t] = use_index_vect[i][t] ? 1 : 0; + } + + checkCudaErrors( + cudaMemcpy(start_ptr, int_vect.data(), sizeof(int) * num_images, cudaMemcpyHostToDevice)); + start_ptr += num_images; + } + } + + // Allocate and copy the times. + checkCudaErrors(cudaMalloc((void **)&device_times, sizeof(float) * num_images)); + checkCudaErrors(cudaMemcpy(device_times, image_data.imageTimes, sizeof(float) * num_images, + cudaMemcpyHostToDevice)); + + // Allocate and copy the images. + checkCudaErrors(cudaMalloc((void **)&device_img, sizeof(float) * num_image_pixels)); + float *next_ptr = device_img; + for (unsigned t = 0; t < num_images; ++t) { + const std::vector &data_ref = stack.getSingleImage(t).getScience().getPixels(); + + assert(data_ref.size() == width * height); + checkCudaErrors(cudaMemcpy(next_ptr, data_ref.data(), sizeof(float) * width * height, + cudaMemcpyHostToDevice)); + next_ptr += width * height; + } + + // Allocate space for the results. + checkCudaErrors(cudaMalloc((void **)&device_res, sizeof(float) * num_stamp_pixels)); + + // Allocate memory for and copy barycentric corrections (if needed). + if (image_data.baryCorrs != nullptr) { + checkCudaErrors(cudaMalloc((void **)&deviceBaryCorrs, sizeof(baryCorrection) * num_images)); + checkCudaErrors(cudaMemcpy(deviceBaryCorrs, image_data.baryCorrs, sizeof(baryCorrection) * num_images, + cudaMemcpyHostToDevice)); + } + + // Wrap the per-image data into a struct. This struct will be copied by value + // during the function call, so we don't need to allocate memory for the + // struct itself. We just set the pointers to the on device vectors. + perImageData device_image_data; + device_image_data.numImages = num_images; + device_image_data.imageTimes = device_times; + device_image_data.baryCorrs = deviceBaryCorrs; + device_image_data.psiParams = nullptr; + device_image_data.phiParams = nullptr; + + dim3 blocks(num_trajectories, 1, 1); + dim3 threads(1, stamp_width, stamp_width); + + // Create the stamps. + device_get_coadd_stamp<<>>(num_images, width, height, device_img, device_image_data, + num_trajectories, device_trjs, params, device_use_index, + device_res); + cudaDeviceSynchronize(); + + // Free up the unneeded memory (everything except for the on-device results). + if (deviceBaryCorrs != nullptr) checkCudaErrors(cudaFree(deviceBaryCorrs)); + checkCudaErrors(cudaFree(device_img)); + if (device_use_index != nullptr) checkCudaErrors(cudaFree(device_use_index)); + checkCudaErrors(cudaFree(device_times)); + checkCudaErrors(cudaFree(device_trjs)); + cudaDeviceSynchronize(); + + // Read back results + checkCudaErrors( + cudaMemcpy(results, device_res, sizeof(float) * num_stamp_pixels, cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + // Free the rest of the on GPU memory. + checkCudaErrors(cudaFree(device_res)); +} + } /* namespace search */ #endif /* KERNELS_CU_ */ diff --git a/src/kbmod/include/common.h b/src/kbmod/search/search_structs.h similarity index 82% rename from src/kbmod/include/common.h rename to src/kbmod/search/search_structs.h index 6e9ece5c..3698a733 100644 --- a/src/kbmod/include/common.h +++ b/src/kbmod/search/search_structs.h @@ -5,22 +5,13 @@ * Author: kbmod-usr */ -#ifndef COMMON_H_ -#define COMMON_H_ +#ifndef SEARCH_STRUCTS_ +#define SEARCH_STRUCTS_ -#ifdef HAVE_CUDA - constexpr bool HAVE_GPU = true; -#else - constexpr bool HAVE_GPU = false; -#endif - -constexpr unsigned int MAX_KERNEL_RADIUS = 15; constexpr unsigned short MAX_STAMP_EDGE = 64; -constexpr unsigned short CONV_THREAD_DIM = 32; constexpr unsigned short THREAD_DIM_X = 128; constexpr unsigned short THREAD_DIM_Y = 2; constexpr unsigned short RESULTS_PER_PIXEL = 8; -constexpr float NO_DATA = -9999.0; enum StampType { STAMP_SUM = 0, STAMP_MEAN, STAMP_MEDIAN }; @@ -43,12 +34,6 @@ struct trajectory { short obsCount; }; -// The position (in pixels) of a trajectory. -struct pixelPos { - float x; - float y; -}; - /* * Linear approximation to the barycentric correction needed to transform a * pixel in the first image to a pixel in a consequent image. One struct needed @@ -130,14 +115,4 @@ struct stampParameters { float m20_limit; }; -// Basic image moments use for analysis. -struct imageMoments { - float m00; - float m01; - float m10; - float m11; - float m02; - float m20; -}; - -#endif /* COMMON_H_ */ +#endif /* SEARCH_STRUCTS_ */