Skip to content

Commit

Permalink
Merge branch 'main' into runner_refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremykubica committed Feb 2, 2024
2 parents 0b2b941 + 302daeb commit 3e775ad
Showing 1 changed file with 82 additions and 165 deletions.
247 changes: 82 additions & 165 deletions src/kbmod/search/kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ extern "C" void device_free_search_data_arrays(SearchData *data) {
}
}

__forceinline__ __device__ PsiPhi read_encoded_psi_phi(SearchDataMeta &params, void *psi_phi_vect, int time,
__host__ __device__ PsiPhi read_encoded_psi_phi(SearchDataMeta &params, void *psi_phi_vect, int time,
int row, int col) {
// Bounds checking.
if ((row < 0) || (col < 0) || (row >= params.height) || (col >= params.width)) {
Expand Down Expand Up @@ -139,6 +139,81 @@ extern "C" __device__ __host__ void SigmaGFilteredIndicesCU(float *values, int n
*max_keep_idx = end - 1;
}

/*
* Evaluate the likelihood score (as computed with from the psi and phi values) for a single
* given candidate trajectory. Modifies the trajectory in place to update the number of
* observations, likelihood, and flux.
*/
extern "C" __device__ __host__ void evaluateTrajectory(SearchDataMeta psi_phi_meta, void *psi_phi_vect, float *image_times,
SearchParameters params, Trajectory *candidate) {
// Data structures used for filtering. We fill in only what we need.
float psi_array[MAX_NUM_IMAGES];
float phi_array[MAX_NUM_IMAGES];
float psi_sum = 0.0;
float phi_sum = 0.0;

// Reset the statistics for the candidate.
candidate->obs_count = 0;
candidate->lh = -1.0;
candidate->flux = -1.0;

// Loop over each image and sample the appropriate pixel
int num_seen = 0;
for (int i = 0; i < psi_phi_meta.num_times; ++i) {
// Predict the trajectory's position.
float curr_time = image_times[i];
int current_x = candidate->x + int(candidate->vx * curr_time + 0.5);
int current_y = candidate->y + int(candidate->vy * curr_time + 0.5);

// Get the Psi and Phi pixel values.
PsiPhi pixel_vals = read_encoded_psi_phi(psi_phi_meta, psi_phi_vect, i, current_y, current_x);
if (pixel_vals.psi != NO_DATA && pixel_vals.phi != NO_DATA) {
psi_sum += pixel_vals.psi;
phi_sum += pixel_vals.phi;
psi_array[num_seen] = pixel_vals.psi;
phi_array[num_seen] = pixel_vals.phi;
num_seen += 1;
}
}
candidate->obs_count = num_seen;
candidate->lh = psi_sum / sqrt(phi_sum);
candidate->flux = psi_sum / phi_sum;

// If we do not have enough observations or a good enough LH score,
// do not bother with any of the following steps.
if ((candidate->obs_count < params.min_observations) ||
(params.do_sigmag_filter && candidate->lh < params.min_lh))
return;

// If we are doing on GPU filtering, run the sigma_g filter and recompute the likelihoods.
if (params.do_sigmag_filter) {
// Fill in a likelihood and index array for sorting.
float lc_array[MAX_NUM_IMAGES];
int idx_array[MAX_NUM_IMAGES];
for (int i = 0; i < num_seen; ++i) {
lc_array[i] = (phi_array[i] != 0) ? (psi_array[i] / phi_array[i]) : 0;
idx_array[i] = i;
}

int min_keep_idx = 0;
int max_keep_idx = num_seen - 1;
SigmaGFilteredIndicesCU(lc_array, num_seen, params.sgl_L, params.sgl_H, params.sigmag_coeff, 2.0,
idx_array, &min_keep_idx, &max_keep_idx);

// Compute the likelihood and flux of the track based on the filtered
// observations (ones in [min_keep_idx, max_keep_idx]).
float new_psi_sum = 0.0;
float new_phi_sum = 0.0;
for (int i = min_keep_idx; i <= max_keep_idx; i++) {
int idx = idx_array[i];
new_psi_sum += psi_array[idx];
new_phi_sum += phi_array[idx];
}
candidate->lh = new_psi_sum / sqrt(new_phi_sum);
candidate->flux = new_psi_sum / new_phi_sum;
}
}

/*
* Searches through images (represented as a flat array of floats) looking for most likely
* trajectories in the given list. Outputs a results image of best trajectories. Returns a
Expand All @@ -165,12 +240,6 @@ __global__ void searchFilterImages(SearchDataMeta psi_phi_meta, void *psi_phi_ve
const int x = x_i + params.x_start_min;
const int y = y_i + params.y_start_min;

// Data structures used for filtering.
float lc_array[MAX_NUM_IMAGES];
float psi_array[MAX_NUM_IMAGES];
float phi_array[MAX_NUM_IMAGES];
int idx_array[MAX_NUM_IMAGES];

// Create an initial set of best results with likelihood -1.0.
// We also set (x, y) because they are used in the later python
// functions.
Expand All @@ -191,67 +260,15 @@ __global__ void searchFilterImages(SearchDataMeta psi_phi_meta, void *psi_phi_ve
curr_trj.vy = trajectories[t].vy;
curr_trj.obs_count = 0;

float psi_sum = 0.0;
float phi_sum = 0.0;

// Loop over each image and sample the appropriate pixel
for (int i = 0; i < psi_phi_meta.num_times; ++i) {
lc_array[i] = 0;
psi_array[i] = 0;
phi_array[i] = 0;
idx_array[i] = i;
}

// Loop over each image and sample the appropriate pixel
int num_seen = 0;
for (int i = 0; i < psi_phi_meta.num_times; ++i) {
// Predict the trajectory's position.
float curr_time = image_times[i];
int current_x = x + int(curr_trj.vx * curr_time + 0.5);
int current_y = y + int(curr_trj.vy * curr_time + 0.5);

// Get the Psi and Phi pixel values.
PsiPhi pixel_vals = read_encoded_psi_phi(psi_phi_meta, psi_phi_vect, i, current_y, current_x);
if (pixel_vals.psi != NO_DATA && pixel_vals.phi != NO_DATA) {
curr_trj.obs_count++;
psi_sum += pixel_vals.psi;
phi_sum += pixel_vals.phi;
psi_array[num_seen] = pixel_vals.psi;
phi_array[num_seen] = pixel_vals.phi;
if (pixel_vals.phi != 0.0) lc_array[num_seen] = pixel_vals.psi / pixel_vals.phi;
num_seen += 1;
}
}
curr_trj.lh = psi_sum / sqrt(phi_sum);
curr_trj.flux = psi_sum / phi_sum;
// Evaluate the trajectory.
evaluateTrajectory(psi_phi_meta, psi_phi_vect, image_times, params, &curr_trj);

// If we do not have enough observations or a good enough LH score,
// do not bother with any of the following steps.
// do not bother inserting it into the sorted list of results.
if ((curr_trj.obs_count < params.min_observations) ||
(params.do_sigmag_filter && curr_trj.lh < params.min_lh))
continue;

// If we are doing on GPU filtering, run the sigma_g filter
// and recompute the likelihoods.
if (params.do_sigmag_filter) {
int min_keep_idx = 0;
int max_keep_idx = num_seen - 1;
SigmaGFilteredIndicesCU(lc_array, num_seen, params.sgl_L, params.sgl_H, params.sigmag_coeff, 2.0,
idx_array, &min_keep_idx, &max_keep_idx);

// Compute the likelihood and flux of the track based on the filtered
// observations (ones in [min_keep_idx, max_keep_idx]).
float new_psi_sum = 0.0;
float new_phi_sum = 0.0;
for (int i = min_keep_idx; i <= max_keep_idx; i++) {
int idx = idx_array[i];
new_psi_sum += psi_array[idx];
new_phi_sum += phi_array[idx];
}
curr_trj.lh = new_psi_sum / sqrt(new_phi_sum);
curr_trj.flux = new_psi_sum / new_phi_sum;
}

// Insert the new trajectory into the sorted list of results.
// Only sort the values with valid likelihoods.
Trajectory temp;
Expand Down Expand Up @@ -320,8 +337,9 @@ extern "C" void deviceSearchFilter(SearchData &search_data, SearchParameters par

// Launch Search
searchFilterImages<<<blocks, threads>>>(search_data.get_meta_data(), search_data.get_gpu_array_ptr(),
static_cast<float *>(search_data.get_gpu_time_array_ptr()),
params, num_trajectories, device_tests, device_search_results);
device_img_times, params, num_trajectories, device_tests,
device_search_results);
cudaDeviceSynchronize();

// Read back results
checkCudaErrors(cudaMemcpy(best_results, device_search_results, sizeof(Trajectory) * num_results,
Expand Down Expand Up @@ -512,107 +530,6 @@ void deviceGetCoadds(const unsigned int num_images, const unsigned int width, co
checkCudaErrors(cudaFree(device_res));
}

/*
void deviceGetCoadds(ImageStack &stack, PerImageData image_data, int num_trajectories,
Trajectory *trajectories, StampParameters params,
std::vector<std::vector<bool>> &use_index_vect, float *results) {
// Allocate Device memory
Trajectory *device_trjs;
int *device_use_index = nullptr;
float *device_times;
float *device_img;
float *device_res;
// Compute the dimensions for the data.
const unsigned int num_images = stack.img_count();
const unsigned int width = stack.get_width();
const unsigned int height = stack.get_height();
const unsigned int num_image_pixels = num_images * width * height;
const unsigned int stamp_width = 2 * params.radius + 1;
const unsigned int stamp_ppi = (2 * params.radius + 1) * (2 * params.radius + 1);
const unsigned int num_stamp_pixels = num_trajectories * stamp_ppi;
// Allocate and copy the trajectories.
checkCudaErrors(cudaMalloc((void **)&device_trjs, sizeof(Trajectory) * num_trajectories));
checkCudaErrors(cudaMemcpy(device_trjs, trajectories, sizeof(Trajectory) * num_trajectories,
cudaMemcpyHostToDevice));
// Check if we need to create a vector of per-trajectory, per-image use.
// Convert the vector of booleans into an integer array so we do a cudaMemcpy.
if (use_index_vect.size() == num_trajectories) {
checkCudaErrors(cudaMalloc((void **)&device_use_index, sizeof(int) * num_images * num_trajectories));
int *start_ptr = device_use_index;
std::vector<int> int_vect(num_images, 0);
for (unsigned i = 0; i < num_trajectories; ++i) {
assert(use_index_vect[i].size() == num_images);
for (unsigned t = 0; t < num_images; ++t) {
int_vect[t] = use_index_vect[i][t] ? 1 : 0;
}
checkCudaErrors(
cudaMemcpy(start_ptr, int_vect.data(), sizeof(int) * num_images, cudaMemcpyHostToDevice));
start_ptr += num_images;
}
}
// Allocate and copy the times.
checkCudaErrors(cudaMalloc((void **)&device_times, sizeof(float) * num_images));
checkCudaErrors(cudaMemcpy(device_times, image_data.image_times, sizeof(float) * num_images,
cudaMemcpyHostToDevice));
// Allocate and copy the images.
checkCudaErrors(cudaMalloc((void **)&device_img, sizeof(float) * num_image_pixels));
float *next_ptr = device_img;
for (unsigned t = 0; t < num_images; ++t) {
// Used to be a vector of floats, now is an eigen::vector of floats or something
// but that's ok because all we use it for is the .data() -> float*
// I think this can also just directly go to .data because of RowMajor layout
auto& data_ref = stack.get_single_image(t).get_science().get_image();
assert(data_ref.size() == width * height);
checkCudaErrors(cudaMemcpy(next_ptr, data_ref.data(), sizeof(float) * width * height,
cudaMemcpyHostToDevice));
next_ptr += width * height;
}
// Allocate space for the results.
checkCudaErrors(cudaMalloc((void **)&device_res, sizeof(float) * num_stamp_pixels));
// Wrap the per-image data into a struct. This struct will be copied by value
// during the function call, so we don't need to allocate memory for the
// struct itself. We just set the pointers to the on device vectors.
PerImageData device_image_data;
device_image_data.num_images = num_images;
device_image_data.image_times = device_times;
device_image_data.psi_params = nullptr;
device_image_data.phi_params = nullptr;
dim3 blocks(num_trajectories, 1, 1);
dim3 threads(1, stamp_width, stamp_width);
// Create the stamps.
deviceGetCoaddStamp<<<blocks, threads>>>(num_images, width, height, device_img, device_image_data,
num_trajectories, device_trjs, params, device_use_index,
device_res);
cudaDeviceSynchronize();
// Free up the unneeded memory (everything except for the on-device results).
checkCudaErrors(cudaFree(device_img));
if (device_use_index != nullptr) checkCudaErrors(cudaFree(device_use_index));
checkCudaErrors(cudaFree(device_times));
checkCudaErrors(cudaFree(device_trjs));
cudaDeviceSynchronize();
// Read back results
checkCudaErrors(
cudaMemcpy(results, device_res, sizeof(float) * num_stamp_pixels, cudaMemcpyDeviceToHost));
cudaDeviceSynchronize();
// Free the rest of the on GPU memory.
checkCudaErrors(cudaFree(device_res));
}
*/
} /* namespace search */

#endif /* KERNELS_CU_ */

0 comments on commit 3e775ad

Please sign in to comment.