Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Break out core evaluation into its own function #446

Merged
merged 1 commit into from
Feb 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 80 additions & 163 deletions src/kbmod/search/kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ extern "C" void device_free_psi_phi_array(PsiPhiArray *data) {
}
}

__forceinline__ __device__ PsiPhi read_encoded_psi_phi(PsiPhiArrayMeta &params, void *psi_phi_vect, int time,
__host__ __device__ PsiPhi read_encoded_psi_phi(PsiPhiArrayMeta &params, void *psi_phi_vect, int time,
int row, int col) {
// Bounds checking.
if ((row < 0) || (col < 0) || (row >= params.height) || (col >= params.width)) {
Expand Down Expand Up @@ -122,6 +122,81 @@ extern "C" __device__ __host__ void SigmaGFilteredIndicesCU(float *values, int n
*max_keep_idx = end - 1;
}

/*
* Evaluate the likelihood score (as computed with from the psi and phi values) for a single
* given candidate trajectory. Modifies the trajectory in place to update the number of
* observations, likelihood, and flux.
*/
extern "C" __device__ __host__ void evaluateTrajectory(PsiPhiArrayMeta psi_phi_meta, void *psi_phi_vect, float *image_times,
SearchParameters params, Trajectory *candidate) {
// Data structures used for filtering. We fill in only what we need.
float psi_array[MAX_NUM_IMAGES];
float phi_array[MAX_NUM_IMAGES];
float psi_sum = 0.0;
float phi_sum = 0.0;

// Reset the statistics for the candidate.
candidate->obs_count = 0;
candidate->lh = -1.0;
candidate->flux = -1.0;

// Loop over each image and sample the appropriate pixel
int num_seen = 0;
for (int i = 0; i < psi_phi_meta.num_times; ++i) {
// Predict the trajectory's position.
float curr_time = image_times[i];
int current_x = candidate->x + int(candidate->vx * curr_time + 0.5);
int current_y = candidate->y + int(candidate->vy * curr_time + 0.5);

// Get the Psi and Phi pixel values.
PsiPhi pixel_vals = read_encoded_psi_phi(psi_phi_meta, psi_phi_vect, i, current_y, current_x);
if (pixel_vals.psi != NO_DATA && pixel_vals.phi != NO_DATA) {
psi_sum += pixel_vals.psi;
phi_sum += pixel_vals.phi;
psi_array[num_seen] = pixel_vals.psi;
phi_array[num_seen] = pixel_vals.phi;
num_seen += 1;
}
}
candidate->obs_count = num_seen;
candidate->lh = psi_sum / sqrt(phi_sum);
candidate->flux = psi_sum / phi_sum;

// If we do not have enough observations or a good enough LH score,
// do not bother with any of the following steps.
if ((candidate->obs_count < params.min_observations) ||
(params.do_sigmag_filter && candidate->lh < params.min_lh))
return;

// If we are doing on GPU filtering, run the sigma_g filter and recompute the likelihoods.
if (params.do_sigmag_filter) {
// Fill in a likelihood and index array for sorting.
float lc_array[MAX_NUM_IMAGES];
int idx_array[MAX_NUM_IMAGES];
for (int i = 0; i < num_seen; ++i) {
lc_array[i] = (phi_array[i] != 0) ? (psi_array[i] / phi_array[i]) : 0;
idx_array[i] = i;
}

int min_keep_idx = 0;
int max_keep_idx = num_seen - 1;
SigmaGFilteredIndicesCU(lc_array, num_seen, params.sgl_L, params.sgl_H, params.sigmag_coeff, 2.0,
idx_array, &min_keep_idx, &max_keep_idx);

// Compute the likelihood and flux of the track based on the filtered
// observations (ones in [min_keep_idx, max_keep_idx]).
float new_psi_sum = 0.0;
float new_phi_sum = 0.0;
for (int i = min_keep_idx; i <= max_keep_idx; i++) {
int idx = idx_array[i];
new_psi_sum += psi_array[idx];
new_phi_sum += phi_array[idx];
}
candidate->lh = new_psi_sum / sqrt(new_phi_sum);
candidate->flux = new_psi_sum / new_phi_sum;
}
}

/*
* Searches through images (represented as a flat array of floats) looking for most likely
* trajectories in the given list. Outputs a results image of best trajectories. Returns a
Expand All @@ -148,12 +223,6 @@ __global__ void searchFilterImages(PsiPhiArrayMeta psi_phi_meta, void *psi_phi_v
const int x = x_i + params.x_start_min;
const int y = y_i + params.y_start_min;

// Data structures used for filtering.
float lc_array[MAX_NUM_IMAGES];
float psi_array[MAX_NUM_IMAGES];
float phi_array[MAX_NUM_IMAGES];
int idx_array[MAX_NUM_IMAGES];

// Create an initial set of best results with likelihood -1.0.
// We also set (x, y) because they are used in the later python
// functions.
Expand All @@ -174,67 +243,15 @@ __global__ void searchFilterImages(PsiPhiArrayMeta psi_phi_meta, void *psi_phi_v
curr_trj.vy = trajectories[t].vy;
curr_trj.obs_count = 0;

float psi_sum = 0.0;
float phi_sum = 0.0;

// Loop over each image and sample the appropriate pixel
for (int i = 0; i < psi_phi_meta.num_times; ++i) {
lc_array[i] = 0;
psi_array[i] = 0;
phi_array[i] = 0;
idx_array[i] = i;
}

// Loop over each image and sample the appropriate pixel
int num_seen = 0;
for (int i = 0; i < psi_phi_meta.num_times; ++i) {
// Predict the trajectory's position.
float curr_time = image_times[i];
int current_x = x + int(curr_trj.vx * curr_time + 0.5);
int current_y = y + int(curr_trj.vy * curr_time + 0.5);

// Get the Psi and Phi pixel values.
PsiPhi pixel_vals = read_encoded_psi_phi(psi_phi_meta, psi_phi_vect, i, current_y, current_x);
if (pixel_vals.psi != NO_DATA && pixel_vals.phi != NO_DATA) {
curr_trj.obs_count++;
psi_sum += pixel_vals.psi;
phi_sum += pixel_vals.phi;
psi_array[num_seen] = pixel_vals.psi;
phi_array[num_seen] = pixel_vals.phi;
if (pixel_vals.phi != 0.0) lc_array[num_seen] = pixel_vals.psi / pixel_vals.phi;
num_seen += 1;
}
}
curr_trj.lh = psi_sum / sqrt(phi_sum);
curr_trj.flux = psi_sum / phi_sum;
// Evaluate the trajectory.
evaluateTrajectory(psi_phi_meta, psi_phi_vect, image_times, params, &curr_trj);

// If we do not have enough observations or a good enough LH score,
// do not bother with any of the following steps.
// do not bother inserting it into the sorted list of results.
if ((curr_trj.obs_count < params.min_observations) ||
(params.do_sigmag_filter && curr_trj.lh < params.min_lh))
continue;

// If we are doing on GPU filtering, run the sigma_g filter
// and recompute the likelihoods.
if (params.do_sigmag_filter) {
int min_keep_idx = 0;
int max_keep_idx = num_seen - 1;
SigmaGFilteredIndicesCU(lc_array, num_seen, params.sgl_L, params.sgl_H, params.sigmag_coeff, 2.0,
idx_array, &min_keep_idx, &max_keep_idx);

// Compute the likelihood and flux of the track based on the filtered
// observations (ones in [min_keep_idx, max_keep_idx]).
float new_psi_sum = 0.0;
float new_phi_sum = 0.0;
for (int i = min_keep_idx; i <= max_keep_idx; i++) {
int idx = idx_array[i];
new_psi_sum += psi_array[idx];
new_phi_sum += phi_array[idx];
}
curr_trj.lh = new_psi_sum / sqrt(new_phi_sum);
curr_trj.flux = new_psi_sum / new_phi_sum;
}

// Insert the new trajectory into the sorted list of results.
// Only sort the values with valid likelihoods.
Trajectory temp;
Expand Down Expand Up @@ -311,6 +328,7 @@ extern "C" void deviceSearchFilter(PsiPhiArray &psi_phi_array, float *image_time
searchFilterImages<<<blocks, threads>>>(psi_phi_array.get_meta_data(), psi_phi_array.get_gpu_array_ptr(),
device_img_times, params, num_trajectories, device_tests,
device_search_results);
cudaDeviceSynchronize();

// Read back results
checkCudaErrors(cudaMemcpy(best_results, device_search_results, sizeof(Trajectory) * num_results,
Expand Down Expand Up @@ -502,107 +520,6 @@ void deviceGetCoadds(const unsigned int num_images, const unsigned int width, co
checkCudaErrors(cudaFree(device_res));
}

/*
void deviceGetCoadds(ImageStack &stack, PerImageData image_data, int num_trajectories,
Trajectory *trajectories, StampParameters params,
std::vector<std::vector<bool>> &use_index_vect, float *results) {
// Allocate Device memory
Trajectory *device_trjs;
int *device_use_index = nullptr;
float *device_times;
float *device_img;
float *device_res;

// Compute the dimensions for the data.
const unsigned int num_images = stack.img_count();
const unsigned int width = stack.get_width();
const unsigned int height = stack.get_height();
const unsigned int num_image_pixels = num_images * width * height;
const unsigned int stamp_width = 2 * params.radius + 1;
const unsigned int stamp_ppi = (2 * params.radius + 1) * (2 * params.radius + 1);
const unsigned int num_stamp_pixels = num_trajectories * stamp_ppi;

// Allocate and copy the trajectories.
checkCudaErrors(cudaMalloc((void **)&device_trjs, sizeof(Trajectory) * num_trajectories));
checkCudaErrors(cudaMemcpy(device_trjs, trajectories, sizeof(Trajectory) * num_trajectories,
cudaMemcpyHostToDevice));

// Check if we need to create a vector of per-trajectory, per-image use.
// Convert the vector of booleans into an integer array so we do a cudaMemcpy.
if (use_index_vect.size() == num_trajectories) {
checkCudaErrors(cudaMalloc((void **)&device_use_index, sizeof(int) * num_images * num_trajectories));

int *start_ptr = device_use_index;
std::vector<int> int_vect(num_images, 0);
for (unsigned i = 0; i < num_trajectories; ++i) {
assert(use_index_vect[i].size() == num_images);
for (unsigned t = 0; t < num_images; ++t) {
int_vect[t] = use_index_vect[i][t] ? 1 : 0;
}

checkCudaErrors(
cudaMemcpy(start_ptr, int_vect.data(), sizeof(int) * num_images, cudaMemcpyHostToDevice));
start_ptr += num_images;
}
}

// Allocate and copy the times.
checkCudaErrors(cudaMalloc((void **)&device_times, sizeof(float) * num_images));
checkCudaErrors(cudaMemcpy(device_times, image_data.image_times, sizeof(float) * num_images,
cudaMemcpyHostToDevice));

// Allocate and copy the images.
checkCudaErrors(cudaMalloc((void **)&device_img, sizeof(float) * num_image_pixels));
float *next_ptr = device_img;
for (unsigned t = 0; t < num_images; ++t) {
// Used to be a vector of floats, now is an eigen::vector of floats or something
// but that's ok because all we use it for is the .data() -> float*
// I think this can also just directly go to .data because of RowMajor layout
auto& data_ref = stack.get_single_image(t).get_science().get_image();

assert(data_ref.size() == width * height);
checkCudaErrors(cudaMemcpy(next_ptr, data_ref.data(), sizeof(float) * width * height,
cudaMemcpyHostToDevice));
next_ptr += width * height;
}

// Allocate space for the results.
checkCudaErrors(cudaMalloc((void **)&device_res, sizeof(float) * num_stamp_pixels));

// Wrap the per-image data into a struct. This struct will be copied by value
// during the function call, so we don't need to allocate memory for the
// struct itself. We just set the pointers to the on device vectors.
PerImageData device_image_data;
device_image_data.num_images = num_images;
device_image_data.image_times = device_times;
device_image_data.psi_params = nullptr;
device_image_data.phi_params = nullptr;

dim3 blocks(num_trajectories, 1, 1);
dim3 threads(1, stamp_width, stamp_width);

// Create the stamps.
deviceGetCoaddStamp<<<blocks, threads>>>(num_images, width, height, device_img, device_image_data,
num_trajectories, device_trjs, params, device_use_index,
device_res);
cudaDeviceSynchronize();

// Free up the unneeded memory (everything except for the on-device results).
checkCudaErrors(cudaFree(device_img));
if (device_use_index != nullptr) checkCudaErrors(cudaFree(device_use_index));
checkCudaErrors(cudaFree(device_times));
checkCudaErrors(cudaFree(device_trjs));
cudaDeviceSynchronize();

// Read back results
checkCudaErrors(
cudaMemcpy(results, device_res, sizeof(float) * num_stamp_pixels, cudaMemcpyDeviceToHost));
cudaDeviceSynchronize();

// Free the rest of the on GPU memory.
checkCudaErrors(cudaFree(device_res));
}
*/
} /* namespace search */

#endif /* KERNELS_CU_ */
Loading