Skip to content

Commit

Permalink
Minor speedup due to reusing calcuaiton isntead of repeating them
Browse files Browse the repository at this point in the history
  • Loading branch information
KSkwarczynski committed Feb 8, 2025
1 parent 96d89d2 commit 9c24fe7
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 45 deletions.
4 changes: 2 additions & 2 deletions covariance/covarianceBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -667,8 +667,8 @@ double covarianceBase::CalcLikelihood() const _noexcept_ {
for (int j = 0; j <= i; ++j) {
if (!_fFlatPrior[i] && !_fFlatPrior[j]) {
//KS: Since matrix is symmetric we can calculate non diagonal elements only once and multiply by 2, can bring up to factor speed decrease.
short int scale = (i != j) ? 2 : 1;
logL += scale * 0.5*(_fPropVal[i] - _fPreFitValue[i])*(_fPropVal[j] - _fPreFitValue[j])*InvertCovMatrix[i][j];
double scale = (i != j) ? 1. : 0.5;
logL += scale * (_fPropVal[i] - _fPreFitValue[i])*(_fPropVal[j] - _fPreFitValue[j])*InvertCovMatrix[i][j];
}
}
}
Expand Down
30 changes: 15 additions & 15 deletions splines/SplineMonolith.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ void SMonolith::LoadSplineFile(std::string FileName) {
FileName.insert(0, std::string(std::getenv("MACH3"))+"/");
}

TFile *SplineFile = new TFile(FileName.c_str(), "OPEN");
auto SplineFile = std::make_unique<TFile>(FileName.c_str(), "OPEN");
TTree *Settings = SplineFile->Get<TTree>("Settings");
TTree *Monolith = SplineFile->Get<TTree>("Monolith");
TTree *Monolith_TF1 = SplineFile->Get<TTree>("Monolith_TF1");
Expand Down Expand Up @@ -655,7 +655,6 @@ void SMonolith::LoadSplineFile(std::string FileName) {
}

SplineFile->Close();
delete SplineFile;

// Print some info; could probably make this to a separate function
PrintInitialsiation();
Expand All @@ -672,7 +671,7 @@ void SMonolith::PrepareSplineFile() {
FileName.insert(0, std::string(std::getenv("MACH3"))+"/");
}

TFile *SplineFile = new TFile(FileName.c_str(), "recreate");
auto SplineFile = std::make_unique<TFile>(FileName.c_str(), "recreate");
TTree *Settings = new TTree("Settings", "Settings");
TTree *Monolith = new TTree("Monolith", "Monolith");
TTree *Monolith_TF1 = new TTree("Monolith_TF1", "Monolith_TF1");
Expand Down Expand Up @@ -791,7 +790,6 @@ void SMonolith::PrepareSplineFile() {
delete EventInfo;
delete FastSplineInfoTree;
SplineFile->Close();
delete SplineFile;
}

// *****************************************
Expand Down Expand Up @@ -929,7 +927,7 @@ void SMonolith::FindSplineSegment() {
for (M3::int_t i = 0; i < nParams; ++i)
{
const M3::int_t nPoints = SplineInfoArray[i].nPts;
const M3::float_t* xArray = SplineInfoArray[i].xPts;
const M3::float_t* _restrict_ xArray = SplineInfoArray[i].xPts;

// Get the variation for this reconfigure for the ith parameter
const float xvar = float(*SplineInfoArray[i].splineParsPointer);
Expand Down Expand Up @@ -1029,9 +1027,9 @@ void SMonolith::CalcSplineWeights() {
// We've read the segment straight from CPU and is saved in segment_gpu
// polynomial parameters from the monolithic splineMonolith
const float fY = cpu_spline_handler->coeff_many[CurrentKnotPos];
const float fB = cpu_spline_handler->coeff_many[CurrentKnotPos+1];
const float fC = cpu_spline_handler->coeff_many[CurrentKnotPos+2];
const float fD = cpu_spline_handler->coeff_many[CurrentKnotPos+3];
const float fB = cpu_spline_handler->coeff_many[CurrentKnotPos + 1];
const float fC = cpu_spline_handler->coeff_many[CurrentKnotPos + 2];
const float fD = cpu_spline_handler->coeff_many[CurrentKnotPos + 3];
// The is the variation itself (needed to evaluate variation - stored spline point = dx)
const float dx = ParamValues[Param] - cpu_spline_handler->coeff_x[segment_X];

Expand All @@ -1050,12 +1048,12 @@ void SMonolith::CalcSplineWeights() {
const float x = ParamValues[cpu_paramNo_TF1_arr[tf1Num]];

// Read the coefficients
const float a = cpu_coeff_TF1_many[tf1Num*_nTF1Coeff_];
const float b = cpu_coeff_TF1_many[tf1Num*_nTF1Coeff_+1];
const unsigned int TF1_Index = tf1Num * _nTF1Coeff_;
const float a = cpu_coeff_TF1_many[TF1_Index];
const float b = cpu_coeff_TF1_many[TF1_Index + 1];

cpu_weights_tf1_var[tf1Num] = fmaf(a, x, b);
// cpu_weights_tf1_var[tf1Num] = a*x + b;

//cpu_weights_tf1_var[splineNum] = 1 + a*x + b*x*x + c*x*x*x + d*x*x*x*x + e*x*x*x*x*x;
}
#ifdef MULTITHREAD
Expand All @@ -1076,9 +1074,11 @@ void SMonolith::ModifyWeights(){
{
float totalWeight = 1.0f; // Initialize total weight for each event

const unsigned int Offset = 2 * EventNum;

// Extract the parameters for the current event
const unsigned int startIndex = cpu_nParamPerEvent[2 * EventNum + 1];
const unsigned int numParams = cpu_nParamPerEvent[2 * EventNum];
const unsigned int startIndex = cpu_nParamPerEvent[Offset + 1];
const unsigned int numParams = cpu_nParamPerEvent[Offset];

// Compute total weight for the current event
#ifdef MULTITHREAD
Expand All @@ -1089,8 +1089,8 @@ void SMonolith::ModifyWeights(){
}
//Now TF1
// Extract the parameters for the current event
const unsigned int startIndex_tf1 = cpu_nParamPerEvent_tf1[2 * EventNum + 1];
const unsigned int numParams_tf1 = cpu_nParamPerEvent_tf1[2 * EventNum];
const unsigned int startIndex_tf1 = cpu_nParamPerEvent_tf1[Offset + 1];
const unsigned int numParams_tf1 = cpu_nParamPerEvent_tf1[Offset];

// Compute total weight for the current event
#ifdef MULTITHREAD
Expand Down
39 changes: 11 additions & 28 deletions splines/gpuSplineUtils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ SMonolithGPU::~SMonolithGPU(){
// *******************************************
// Initialiser when using the x array and combined y,b,c,d array
__host__ void SMonolithGPU::InitGPU_SplineMonolith(
// *******************************************
#ifndef Weight_On_SplineBySpline_Basis
float **cpu_total_weights,
int n_events,
Expand All @@ -113,7 +112,7 @@ __host__ void SMonolithGPU::InitGPU_SplineMonolith(
unsigned int n_splines,
unsigned int n_tf1,
int Eve_size) {

// *******************************************
// Allocate chunks of memory to GPU
cudaMalloc((void **) &gpu_paramNo_arr, n_splines*sizeof(short int));
CudaCheckError();
Expand Down Expand Up @@ -197,7 +196,6 @@ __host__ void SMonolithGPU::InitGPU_Vals(float **vals) {
// ******************************************************
// Copy to GPU for x array and separate ybcd array
__host__ void SMonolithGPU::CopyToGPU_SplineMonolith(
// ******************************************************
SplineMonoStruct* cpu_spline_handler,

// TFI related now
Expand All @@ -214,6 +212,7 @@ __host__ void SMonolithGPU::CopyToGPU_SplineMonolith(
short int spline_size,
unsigned int total_nknots,
unsigned int n_tf1) {
// ******************************************************
if (n_params != _N_SPLINES_) {
printf("Number of splines not equal to %i, GPU code for event-by-event splines will fail\n", _N_SPLINES_);
printf("n_params = %i\n", n_params);
Expand Down Expand Up @@ -351,7 +350,6 @@ __global__ void EvalOnGPU_Splines(
float* __restrict__ gpu_weights,
const cudaTextureObject_t __restrict__ text_coeff_x) {
//*********************************************************

// points per spline is the offset to skip in the index to move between splines
const unsigned int splineNum = (blockIdx.x * blockDim.x + threadIdx.x);

Expand All @@ -375,9 +373,9 @@ __global__ void EvalOnGPU_Splines(
// We've read the segment straight from CPU and is saved in segment_gpu
// polynomial parameters from the monolithic splineMonolith
const float fY = gpu_coeff_many[CurrentKnotPos];
const float fB = gpu_coeff_many[CurrentKnotPos+1];
const float fC = gpu_coeff_many[CurrentKnotPos+2];
const float fD = gpu_coeff_many[CurrentKnotPos+3];
const float fB = gpu_coeff_many[CurrentKnotPos + 1];
const float fC = gpu_coeff_many[CurrentKnotPos + 2];
const float fD = gpu_coeff_many[CurrentKnotPos + 3];
// The is the variation itself (needed to evaluate variation - stored spline point = dx)
const float dx = val_gpu[Param] - tex1Dfetch<float>(text_coeff_x, segment_X);

Expand All @@ -399,7 +397,6 @@ __global__ void EvalOnGPU_TF1(
const short int* __restrict__ gpu_paramNo_arr_tf1,
float* __restrict__ gpu_weights_tf1) {
//*********************************************************

// points per spline is the offset to skip in the index to move between splines
const unsigned int tf1Num = (blockIdx.x * blockDim.x + threadIdx.x);

Expand All @@ -408,18 +405,14 @@ __global__ void EvalOnGPU_TF1(
const float x = val_gpu[gpu_paramNo_arr_tf1[tf1Num]];

// Read the coefficients
const float a = gpu_coeffs_tf1[tf1Num*_nTF1Coeff_];
const float b = gpu_coeffs_tf1[tf1Num*_nTF1Coeff_+1];
const unsigned int TF1_Index = tf1Num * _nTF1Coeff_;
const float a = gpu_coeffs_tf1[TF1_Index];
const float b = gpu_coeffs_tf1[TF1_Index+1];

gpu_weights_tf1[tf1Num] = fmaf(a, x, b);

// gpu_weights_tf1[tf1Num] = a*x + b;
//
//gpu_weights_tf1[tf1Num] = 1 + a*x + b*x*x + c*x*x*x + d*x*x*x*x + e*x*x*x*x*x;

//#ifdef DEBUG
//printf("TF1 = %i/%i, weight = %f \n", tf1Num, d_n_TF1, gpu_weights_tf1[tf1Num]);
//#endif
}
}

Expand All @@ -443,24 +436,14 @@ __global__ void EvalOnGPU_TotWeight(
{
shared_total_weights[threadIdx.x] = 1.f;

const unsigned int EventOffset = 2*EventNum;
const unsigned int EventOffset = 2 * EventNum;

for (unsigned int id = 0; id < tex1Dfetch<unsigned int>(text_nParamPerEvent, EventOffset); ++id)
{
for (unsigned int id = 0; id < tex1Dfetch<unsigned int>(text_nParamPerEvent, EventOffset); ++id) {
shared_total_weights[threadIdx.x] *= gpu_weights[tex1Dfetch<unsigned int>(text_nParamPerEvent, EventOffset+1) + id];
//#ifdef DEBUG
//printf("Event = %i, Spline_Num = %i, gpu_weights = %f \n",
// EventNum, tex1Dfetch<unsigned int>(text_nParamPerEvent, 2*EventNum+1) + id, gpu_weights[tex1Dfetch<unsigned int>(text_nParamPerEvent, 2*EventNum+1) + id]);
//#endif
}

for (unsigned int id = 0; id < tex1Dfetch<unsigned int>(text_nParamPerEvent_TF1, EventOffset); ++id)
{
for (unsigned int id = 0; id < tex1Dfetch<unsigned int>(text_nParamPerEvent_TF1, EventOffset); ++id) {
shared_total_weights[threadIdx.x] *= gpu_weights_tf1[tex1Dfetch<unsigned int>(text_nParamPerEvent_TF1, EventOffset+1) + id];
//#ifdef DEBUG
//printf("Event = %i, Spline_Num = %i, gpu_weights_tf1 = %f \n",
// EventNum, tex1Dfetch<unsigned int>(text_nParamPerEvent_TF1, 2*EventNum+1) + id, gpu_weights_tf1[tex1Dfetch<unsigned int>(text_nParamPerEvent_TF1, 2*EventNum+1) + id]);
//#endif
}
gpu_total_weights[EventNum] = shared_total_weights[threadIdx.x];
}
Expand Down

0 comments on commit 9c24fe7

Please sign in to comment.