Split up common.h

dirac-institute · Sep 19, 2023 · 16f58e4 · 16f58e4
1 parent 07db050
commit 16f58e4
Show file tree

Hide file tree

Showing 7 changed files with 303 additions and 287 deletions.
diff --git a/src/kbmod/image_base/bindings.cpp b/src/kbmod/image_base/bindings.cpp
@@ -20,11 +20,6 @@ using std::to_string;
 PYBIND11_MODULE(search, m) {
     m.attr("KB_NO_DATA") = pybind11::float_(NO_DATA);
     m.attr("HAS_GPU") = pybind11::bool_(HAVE_GPU);
-    py::enum_<StampType>(m, "StampType")
-            .value("STAMP_SUM", StampType::STAMP_SUM)
-            .value("STAMP_MEAN", StampType::STAMP_MEAN)
-            .value("STAMP_MEDIAN", StampType::STAMP_MEDIAN)
-            .export_values();
     py::class_<pf>(m, "psf", py::buffer_protocol(), R"pbdoc(
             Point Spread Function.
 
@@ -176,34 +171,6 @@ PYBIND11_MODULE(search, m) {
             .def("get_width", &is::getWidth)
             .def("get_height", &is::getHeight)
             .def("get_npixels", &is::getNPixels);
-    py::class_<trajectory>(m, "trajectory", R"pbdoc(
-            A trajectory structure holding basic information about potential results.
-            )pbdoc")
-            .def(py::init<>())
-            .def_readwrite("x_v", &trajectory::xVel)
-            .def_readwrite("y_v", &trajectory::yVel)
-            .def_readwrite("lh", &trajectory::lh)
-            .def_readwrite("flux", &trajectory::flux)
-            .def_readwrite("x", &trajectory::x)
-            .def_readwrite("y", &trajectory::y)
-            .def_readwrite("obs_count", &trajectory::obsCount)
-            .def("__repr__",
-                 [](const trajectory &t) {
-                     return "lh: " + to_string(t.lh) + " flux: " + to_string(t.flux) +
-                            " x: " + to_string(t.x) + " y: " + to_string(t.y) + " x_v: " + to_string(t.xVel) +
-                            " y_v: " + to_string(t.yVel) + " obs_count: " + to_string(t.obsCount);
-                 })
-            .def(py::pickle(
-                    [](const trajectory &p) {  // __getstate__
-                        return py::make_tuple(p.xVel, p.yVel, p.lh, p.flux, p.x, p.y, p.obsCount);
-                    },
-                    [](py::tuple t) {  // __setstate__
-                        if (t.size() != 7) throw std::runtime_error("Invalid state!");
-                        trajectory trj = {t[0].cast<float>(), t[1].cast<float>(), t[2].cast<float>(),
-                                          t[3].cast<float>(), t[4].cast<short>(), t[5].cast<short>(),
-                                          t[6].cast<short>()};
-                        return trj;
-                    }));
     py::class_<pixelPos>(m, "pixel_pos")
             .def(py::init<>())
             .def_readwrite("x", &pixelPos::x)
@@ -217,17 +184,4 @@ PYBIND11_MODULE(search, m) {
             .def_readwrite("m11", &imageMoments::m11)
             .def_readwrite("m02", &imageMoments::m02)
             .def_readwrite("m20", &imageMoments::m20);
-    py::class_<stampParameters>(m, "stamp_parameters")
-            .def(py::init<>())
-            .def_readwrite("radius", &stampParameters::radius)
-            .def_readwrite("stamp_type", &stampParameters::stamp_type)
-            .def_readwrite("do_filtering", &stampParameters::do_filtering)
-            .def_readwrite("center_thresh", &stampParameters::center_thresh)
-            .def_readwrite("peak_offset_x", &stampParameters::peak_offset_x)
-            .def_readwrite("peak_offset_y", &stampParameters::peak_offset_y)
-            .def_readwrite("m01", &stampParameters::m01_limit)
-            .def_readwrite("m10", &stampParameters::m10_limit)
-            .def_readwrite("m11", &stampParameters::m11_limit)
-            .def_readwrite("m02", &stampParameters::m02_limit)
-            .def_readwrite("m20", &stampParameters::m20_limit);
 }
diff --git a/src/kbmod/image_base/common.h b/src/kbmod/image_base/common.h
@@ -0,0 +1,36 @@
+/*
+ * common.h
+ *
+ *  Created on: Jun 20, 2017
+ *      Author: kbmod-usr
+ */
+
+#ifndef COMMON_H_
+#define COMMON_H_
+
+#ifdef HAVE_CUDA
+    constexpr bool HAVE_GPU = true;
+#else
+    constexpr bool HAVE_GPU = false;
+#endif
+
+constexpr float NO_DATA = -9999.0;
+constexpr unsigned int MAX_KERNEL_RADIUS = 15;
+
+// The position (in pixels) on an image.
+struct pixelPos {
+    float x;
+    float y;
+};
+
+// Basic image moments use for analysis.
+struct imageMoments {
+    float m00;
+    float m01;
+    float m10;
+    float m11;
+    float m02;
+    float m20;
+};
+
+#endif /* COMMON_H_ */
diff --git a/src/kbmod/image_base/image_kernels.cu b/src/kbmod/image_base/image_kernels.cu
@@ -8,16 +8,14 @@
 #ifndef IMAGE_KERNELS_CU_
 #define IMAGE_KERNELS_CU_
 
-#define MAX_STAMP_IMAGES 200
+constexpr unsigned short CONV_THREAD_DIM = 32;
 
 #include <assert.h>
 #include "common.h"
 #include "cuda_errors.h"
 #include <stdio.h>
 #include <float.h>
 
-#include "ImageStack.h"
-
 namespace image_base {
 
 /*
@@ -88,215 +86,6 @@ extern "C" void deviceConvolve(float *sourceImg, float *resultImg, int width, in
     checkCudaErrors(cudaFree(deviceResultImg));
 }
 
-__global__ void device_get_coadd_stamp(int num_images, int width, int height, float *image_vect,
-                                       perImageData image_data, int num_trajectories,
-                                       trajectory *trajectories, stampParameters params, int *use_index_vect,
-                                       float *results) {
-    // Get the trajectory that we are going to be using.
-    const int trj_index = blockIdx.x * blockDim.x + threadIdx.x;
-    if (trj_index < 0 || trj_index >= num_trajectories) return;
-    trajectory trj = trajectories[trj_index];
-
-    // Get the pixel coordinates within the stamp to use.
-    const int stamp_width = 2 * params.radius + 1;
-    const int stamp_x = threadIdx.y;
-    if (stamp_x < 0 || stamp_x >= stamp_width) return;
-
-    const int stamp_y = threadIdx.z;
-    if (stamp_y < 0 || stamp_y >= stamp_width) return;
-
-    // Compute the various offsets for the indices.
-    int use_index_offset = num_images * trj_index;
-    int trj_offset = trj_index * stamp_width * stamp_width;
-    int pixel_index = stamp_width * stamp_y + stamp_x;
-
-    // Allocate space for the coadd information.
-    assert(num_images < MAX_STAMP_IMAGES);
-    float values[MAX_STAMP_IMAGES];
-
-    // Loop over each image and compute the stamp.
-    int num_values = 0;
-    for (int t = 0; t < num_images; ++t) {
-        // Skip entries marked 0 in the use_index_vect.
-        if (use_index_vect != nullptr && use_index_vect[use_index_offset + t] == 0) {
-            continue;
-        }
-
-        // Predict the trajectory's position including the barycentric correction if needed.
-        float cTime = image_data.imageTimes[t];
-        int currentX = int(trj.x + trj.xVel * cTime);
-        int currentY = int(trj.y + trj.yVel * cTime);
-        if (image_data.baryCorrs != nullptr) {
-            baryCorrection bc = image_data.baryCorrs[t];
-            currentX = int(trj.x + trj.xVel * cTime + bc.dx + trj.x * bc.dxdx + trj.y * bc.dxdy);
-            currentY = int(trj.y + trj.yVel * cTime + bc.dy + trj.x * bc.dydx + trj.y * bc.dydy);
-        }
-
-        // Get the stamp and add it to the list of values.
-        int img_x = currentX - params.radius + stamp_x;
-        int img_y = currentY - params.radius + stamp_y;
-        if ((img_x >= 0) && (img_x < width) && (img_y >= 0) && (img_y < height)) {
-            int pixel_index = width * height * t + img_y * width + img_x;
-            if (image_vect[pixel_index] != NO_DATA) {
-                values[num_values] = image_vect[pixel_index];
-                ++num_values;
-            }
-        }
-    }
-
-    // If there are no values, just return 0.
-    if (num_values == 0) {
-        results[trj_offset + pixel_index] = 0.0;
-        return;
-    }
-
-    // Do the actual computation from the values.
-    float result = 0.0;
-    int median_ind = num_values / 2;  // Outside switch to avoid compiler warnings.
-    switch (params.stamp_type) {
-        case STAMP_MEDIAN:
-            // Sort the values in ascending order.
-            for (int j = 0; j < num_values; j++) {
-                for (int k = j + 1; k < num_values; k++) {
-                    if (values[j] > values[k]) {
-                        float tmp = values[j];
-                        values[j] = values[k];
-                        values[k] = tmp;
-                    }
-                }
-            }
-
-            // Take the median value of the pixels with data.
-            if (num_values % 2 == 0) {
-                result = (values[median_ind] + values[median_ind - 1]) / 2.0;
-            } else {
-                result = values[median_ind];
-            }
-            break;
-        case STAMP_SUM:
-            for (int t = 0; t < num_values; ++t) {
-                result += values[t];
-            }
-            break;
-        case STAMP_MEAN:
-            for (int t = 0; t < num_values; ++t) {
-                result += values[t];
-            }
-            result /= float(num_values);
-            break;
-    }
-
-    // Save the result to the correct pixel location.
-    results[trj_offset + pixel_index] = result;
-}
-
-void deviceGetCoadds(ImageStack &stack, perImageData image_data, int num_trajectories,
-                     trajectory *trajectories, stampParameters params,
-                     std::vector<std::vector<bool>> &use_index_vect, float *results) {
-    // Allocate Device memory
-    trajectory *device_trjs;
-    int *device_use_index = nullptr;
-    float *device_times;
-    float *device_img;
-    float *device_res;
-    baryCorrection *deviceBaryCorrs = nullptr;
-
-    // Compute the dimensions for the data.
-    const unsigned int num_images = stack.imgCount();
-    const unsigned int width = stack.getWidth();
-    const unsigned int height = stack.getHeight();
-    const unsigned int num_image_pixels = num_images * width * height;
-    const unsigned int stamp_width = 2 * params.radius + 1;
-    const unsigned int stamp_ppi = (2 * params.radius + 1) * (2 * params.radius + 1);
-    const unsigned int num_stamp_pixels = num_trajectories * stamp_ppi;
-
-    // Allocate and copy the trajectories.
-    checkCudaErrors(cudaMalloc((void **)&device_trjs, sizeof(trajectory) * num_trajectories));
-    checkCudaErrors(cudaMemcpy(device_trjs, trajectories, sizeof(trajectory) * num_trajectories,
-                               cudaMemcpyHostToDevice));
-
-    // Check if we need to create a vector of per-trajectory, per-image use.
-    // Convert the vector of booleans into an integer array so we do a cudaMemcpy.
-    if (use_index_vect.size() == num_trajectories) {
-        checkCudaErrors(cudaMalloc((void **)&device_use_index, sizeof(int) * num_images * num_trajectories));
-
-        int *start_ptr = device_use_index;
-        std::vector<int> int_vect(num_images, 0);
-        for (unsigned i = 0; i < num_trajectories; ++i) {
-            assert(use_index_vect[i].size() == num_images);
-            for (unsigned t = 0; t < num_images; ++t) {
-                int_vect[t] = use_index_vect[i][t] ? 1 : 0;
-            }
-
-            checkCudaErrors(
-                    cudaMemcpy(start_ptr, int_vect.data(), sizeof(int) * num_images, cudaMemcpyHostToDevice));
-            start_ptr += num_images;
-        }
-    }
-
-    // Allocate and copy the times.
-    checkCudaErrors(cudaMalloc((void **)&device_times, sizeof(float) * num_images));
-    checkCudaErrors(cudaMemcpy(device_times, image_data.imageTimes, sizeof(float) * num_images,
-                               cudaMemcpyHostToDevice));
-
-    // Allocate and copy the images.
-    checkCudaErrors(cudaMalloc((void **)&device_img, sizeof(float) * num_image_pixels));
-    float *next_ptr = device_img;
-    for (unsigned t = 0; t < num_images; ++t) {
-        const std::vector<float> &data_ref = stack.getSingleImage(t).getScience().getPixels();
-
-        assert(data_ref.size() == width * height);
-        checkCudaErrors(cudaMemcpy(next_ptr, data_ref.data(), sizeof(float) * width * height,
-                                   cudaMemcpyHostToDevice));
-        next_ptr += width * height;
-    }
-
-    // Allocate space for the results.
-    checkCudaErrors(cudaMalloc((void **)&device_res, sizeof(float) * num_stamp_pixels));
-
-    // Allocate memory for and copy barycentric corrections (if needed).
-    if (image_data.baryCorrs != nullptr) {
-        checkCudaErrors(cudaMalloc((void **)&deviceBaryCorrs, sizeof(baryCorrection) * num_images));
-        checkCudaErrors(cudaMemcpy(deviceBaryCorrs, image_data.baryCorrs, sizeof(baryCorrection) * num_images,
-                                   cudaMemcpyHostToDevice));
-    }
-
-    // Wrap the per-image data into a struct. This struct will be copied by value
-    // during the function call, so we don't need to allocate memory for the
-    // struct itself. We just set the pointers to the on device vectors.
-    perImageData device_image_data;
-    device_image_data.numImages = num_images;
-    device_image_data.imageTimes = device_times;
-    device_image_data.baryCorrs = deviceBaryCorrs;
-    device_image_data.psiParams = nullptr;
-    device_image_data.phiParams = nullptr;
-
-    dim3 blocks(num_trajectories, 1, 1);
-    dim3 threads(1, stamp_width, stamp_width);
-
-    // Create the stamps.
-    device_get_coadd_stamp<<<blocks, threads>>>(num_images, width, height, device_img, device_image_data,
-                                                num_trajectories, device_trjs, params, device_use_index,
-                                                device_res);
-    cudaDeviceSynchronize();
-
-    // Free up the unneeded memory (everything except for the on-device results).
-    if (deviceBaryCorrs != nullptr) checkCudaErrors(cudaFree(deviceBaryCorrs));
-    checkCudaErrors(cudaFree(device_img));
-    if (device_use_index != nullptr) checkCudaErrors(cudaFree(device_use_index));
-    checkCudaErrors(cudaFree(device_times));
-    checkCudaErrors(cudaFree(device_trjs));
-    cudaDeviceSynchronize();
-
-    // Read back results
-    checkCudaErrors(
-            cudaMemcpy(results, device_res, sizeof(float) * num_stamp_pixels, cudaMemcpyDeviceToHost));
-    cudaDeviceSynchronize();
-
-    // Free the rest of the  on GPU memory.
-    checkCudaErrors(cudaFree(device_res));
-}
-
 } /* namespace image_base */
 
 #endif /* IMAGE_KERNELS_CU_ */
diff --git a/src/kbmod/search/KBMOSearch.h b/src/kbmod/search/KBMOSearch.h
@@ -23,6 +23,7 @@
 #include "common.h"
 #include "ImageStack.h"
 #include "PointSpreadFunc.h"
+#include "search_structs.h"
 
 using image_base::ImageStack;
 using image_base::LayeredImage;

diff --git a/src/kbmod/search/bindings.cpp b/src/kbmod/search/bindings.cpp
@@ -15,6 +15,52 @@ using ks = search::KBMOSearch;
 using std::to_string;
 
 PYBIND11_MODULE(search, m) {
+    py::enum_<StampType>(m, "StampType")
+            .value("STAMP_SUM", StampType::STAMP_SUM)
+            .value("STAMP_MEAN", StampType::STAMP_MEAN)
+            .value("STAMP_MEDIAN", StampType::STAMP_MEDIAN)
+            .export_values();
+    py::class_<trajectory>(m, "trajectory", R"pbdoc(
+            A trajectory structure holding basic information about potential results.
+            )pbdoc")
+            .def(py::init<>())
+            .def_readwrite("x_v", &trajectory::xVel)
+            .def_readwrite("y_v", &trajectory::yVel)
+            .def_readwrite("lh", &trajectory::lh)
+            .def_readwrite("flux", &trajectory::flux)
+            .def_readwrite("x", &trajectory::x)
+            .def_readwrite("y", &trajectory::y)
+            .def_readwrite("obs_count", &trajectory::obsCount)
+            .def("__repr__",
+                 [](const trajectory &t) {
+                     return "lh: " + to_string(t.lh) + " flux: " + to_string(t.flux) +
+                            " x: " + to_string(t.x) + " y: " + to_string(t.y) + " x_v: " + to_string(t.xVel) +
+                            " y_v: " + to_string(t.yVel) + " obs_count: " + to_string(t.obsCount);
+                 })
+            .def(py::pickle(
+                    [](const trajectory &p) {  // __getstate__
+                        return py::make_tuple(p.xVel, p.yVel, p.lh, p.flux, p.x, p.y, p.obsCount);
+                    },
+                    [](py::tuple t) {  // __setstate__
+                        if (t.size() != 7) throw std::runtime_error("Invalid state!");
+                        trajectory trj = {t[0].cast<float>(), t[1].cast<float>(), t[2].cast<float>(),
+                                          t[3].cast<float>(), t[4].cast<short>(), t[5].cast<short>(),
+                                          t[6].cast<short>()};
+                        return trj;
+                    }));
+    py::class_<stampParameters>(m, "stamp_parameters")
+            .def(py::init<>())
+            .def_readwrite("radius", &stampParameters::radius)
+            .def_readwrite("stamp_type", &stampParameters::stamp_type)
+            .def_readwrite("do_filtering", &stampParameters::do_filtering)
+            .def_readwrite("center_thresh", &stampParameters::center_thresh)
+            .def_readwrite("peak_offset_x", &stampParameters::peak_offset_x)
+            .def_readwrite("peak_offset_y", &stampParameters::peak_offset_y)
+            .def_readwrite("m01", &stampParameters::m01_limit)
+            .def_readwrite("m10", &stampParameters::m10_limit)
+            .def_readwrite("m11", &stampParameters::m11_limit)
+            .def_readwrite("m02", &stampParameters::m02_limit)
+            .def_readwrite("m20", &stampParameters::m20_limit);
     py::class_<ks>(m, "stack_search")
             .def(py::init<is &>())
             .def("save_psi_phi", &ks::savePsiPhi)