Skip to content

Commit

Permalink
make Statisticall utils actual library, in addtion move plenty of stu…
Browse files Browse the repository at this point in the history
…ff from sample summary to utils
  • Loading branch information
KSkwarczynski committed Jan 15, 2025
1 parent c387927 commit 9c2e10c
Show file tree
Hide file tree
Showing 10 changed files with 853 additions and 773 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set(CMAKE_VERBOSE_MAKEFILE ON)
# CMake version check
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

project(MaCh3 VERSION 1.3.3 LANGUAGES CXX)
project(MaCh3 VERSION 1.3.4 LANGUAGES CXX)
set(MaCh3_VERSION ${PROJECT_VERSION})

#LP - This option name is confusing, but I wont change it now.
Expand Down
2 changes: 1 addition & 1 deletion Doc/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ PROJECT_NAME = "MaCh3"
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER = 1.3.3
PROJECT_NUMBER = 1.3.4

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down
18 changes: 11 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,19 +181,23 @@ The MaCh3 core plotting library code can be found [here](https://github.com/mach
## How To Use
This is an example how your executable can look like using MaCh3:
```cpp
manager *fitMan = nullptr; //Manager is responsible for reading from config
//Manager is responsible for reading from config
std::unique_ptr<manager> fitMan = MaCh3ManagerFactory(argc, argv);

std::vector<samplePDFBase*> sample; //vector storing information about sample for different detector
std::vector<covarianceBase*> Cov; // vector with systematic implementation
mcmc *markovChain = nullptr; // MCMC class, can be replaced with other fitting method
MakeMaCh3Instance(fitMan, sample, Cov, markovChain); //Factory like function which initialises everything
MakeMaCh3Instance(fitMan.get(), sample, Cov); //Factory like function which initialises everything

// FitterBase class, can be replaced with other fitting method
std::unique_ptr<FitterBase> MarkovChain = MaCh3FitterFactory(FitManager.get());

//Adding samples and covariances to the Fitter class could be in the factory
for(unsigned int i = 0; sample.size(); i++)
markovChain->addSamplePDF(sample[i]);
MarkovChain->addSamplePDF(sample[i]);
for(unsigned int i = 0; Cov.size(); i++)
markovChain->addSystObj(Cov[i]);
MarkovChain->addSystObj(Cov[i]);

markovChain->RunLLHScan(); // can run LLH scan
markovChain->runMCMC(); //or run actual fit
MarkovChain->RunLLHScan(); // can run LLH scan
MarkovChain->runMCMC(); //or run actual fit
```
For more see [here](https://github.com/mach3-software/MaCh3Tutorial/blob/main/Tutorial/MCMCTutorial.cpp)
1 change: 1 addition & 0 deletions mcmc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ add_library(MCMC SHARED
MCMCProcessor.cpp
SampleSummary.cpp
MaCh3Factory.cpp
StatisticalUtils.cpp
$<$<NOT:$<BOOL:${CPU_ONLY}>>:gpuMCMCProcessorUtils.cu>
)

Expand Down
233 changes: 8 additions & 225 deletions mcmc/SampleSummary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ SampleSummary::SampleSummary(const int n_Samples, const std::string &Filename, s
}//end loop over samples

DoByModePlots = false;
MeanHist_ByMode = nullptr;
PosteriorHist_ByMode = nullptr;

nModelParams = 0;
Expand Down Expand Up @@ -184,10 +183,8 @@ SampleSummary::~SampleSummary() {
if(MeanHist_ByMode[i][j] != nullptr) delete MeanHist_ByMode[i][j];
}
delete[] PosteriorHist_ByMode[i];
delete[] MeanHist_ByMode[i];
}
delete[] PosteriorHist_ByMode;
delete[] MeanHist_ByMode;
}

for (int i = 0; i < nSamples; ++i)
Expand Down Expand Up @@ -217,7 +214,7 @@ SampleSummary::~SampleSummary() {

// *******************
// Check size of sample against size of vectors
bool SampleSummary::CheckSamples(int Length) {
bool SampleSummary::CheckSamples(const int Length) {
// *******************
bool ok = (nSamples == Length);
if (!ok) {
Expand Down Expand Up @@ -266,7 +263,6 @@ void SampleSummary::AddData(std::vector<TH2Poly*> &Data) {
// Add the nominal histograms to the list (will have N_samples of these)
void SampleSummary::AddNominal(std::vector<TH2Poly*> &Nominal, std::vector<TH2Poly*> &NomW2) {
// *******************

const int Length = int(Nominal.size());
if (!CheckSamples(Length)) throw MaCh3Exception(__FILE__ , __LINE__ );

Expand Down Expand Up @@ -411,7 +407,6 @@ void SampleSummary::AddNominal(std::vector<TH2Poly*> &Nominal, std::vector<TH2Po
// The input here is nSamples long
void SampleSummary::AddThrow(std::vector<TH2Poly*> &SampleVector, std::vector<TH2Poly*> &W2Vec, const double LLHPenalty, const double Weight, const int DrawNumber) {
// *******************

nThrows++;
//KS: Only make sense for PosteriorPredictive
if( !isPriorPredictive )RandomHist->Fill(DrawNumber);
Expand Down Expand Up @@ -490,22 +485,20 @@ void SampleSummary::AddThrow(std::vector<TH2Poly*> &SampleVector, std::vector<TH
// The input here is has dimension [nsample][nMaCh3Modes]
void SampleSummary::AddThrowByMode(std::vector<std::vector<TH2Poly*>> &SampleVector_ByMode) {
// *******************

MCVectorByMode.push_back(SampleVector_ByMode);

//KS: This means this is first time
if(!DoByModePlots)
{
MACH3LOG_INFO("Turning reaction breadkwon mode, brum brum");
PosteriorHist_ByMode = new TH1D***[nSamples];
MeanHist_ByMode = new TH2Poly**[nSamples];

MeanHist_ByMode.resize(nSamples);
for (int SampleNum = 0; SampleNum < nSamples; SampleNum++)
{
if (DataHist[SampleNum] == nullptr) continue;

PosteriorHist_ByMode[SampleNum] = new TH1D**[Modes->GetNModes()+1];
MeanHist_ByMode[SampleNum] = new TH2Poly*[Modes->GetNModes()+1];
MeanHist_ByMode[SampleNum].resize(Modes->GetNModes()+1);
for (int j = 0; j < Modes->GetNModes()+1; j++)
{
PosteriorHist_ByMode[SampleNum][j] = new TH1D*[maxBins[SampleNum]+1];
Expand Down Expand Up @@ -981,7 +974,7 @@ void SampleSummary::Write() {
}

// *******************
// Make the posterior predictive distributions: fit Poissons etc
// Make the posterior predictive distributions: fit Poisson etc
void SampleSummary::MakePredictive() {
// *******************
// First make the projection on the z axis of the TH3D* for every pmu cosmu bin
Expand Down Expand Up @@ -1617,46 +1610,6 @@ void SampleSummary::MakeCutEventRate(TH1D *Histogram, const double DataRate) {
delete Fitter;
}

// ****************
// Make a ratio histogram
template<class HistType>
HistType* SampleSummary::RatioHists(HistType *NumHist, HistType *DenomHist) {
// ****************
HistType *NumCopy = static_cast<HistType*>(NumHist->Clone());
std::string title = std::string(DenomHist->GetName()) + "_ratio";
NumCopy->SetNameTitle(title.c_str(), title.c_str());
NumCopy->Divide(DenomHist);

return NumCopy;
}

// ****************
// Make a ratio th2poly
TH2Poly* SampleSummary::RatioPolys(TH2Poly *NumHist, TH2Poly *DenomHist) {
// ****************
TH2Poly *NumCopy = static_cast<TH2Poly*>(NumHist->Clone());
std::string title = std::string(DenomHist->GetName()) + "_ratio";
NumCopy->SetNameTitle(title.c_str(), title.c_str());

for(int i = 1; i < NumCopy->GetNumberOfBins()+1; ++i)
{
NumCopy->SetBinContent(i,NumHist->GetBinContent(i)/DenomHist->GetBinContent(i));
}

return NumCopy;
}

// ****************
// Normalise a TH2Poly
void SampleSummary::NormaliseTH2Poly(TH2Poly* Histogram){
// ****************
const double Integral = NoOverflowIntegral(Histogram);
for(int j = 1; j < Histogram->GetNumberOfBins()+1; j++)
{
Histogram->SetBinContent(j, Histogram->GetBinContent(j)/Integral);
}
}

// ****************
// Calculate the LLH for TH1D, set the LLH to title of MCHist
void SampleSummary::CalcLLH(TH1D * const & DatHist, TH1D * const & MCHist, TH1D * const & W2Hist) {
Expand Down Expand Up @@ -2283,177 +2236,18 @@ TH1D* SampleSummary::ProjectPoly(TH2Poly* Histogram, const bool ProjectX, const
//KS: We have two methods how to apply statistical fluctuation standard is faster hence is default
void SampleSummary::MakeFluctuatedHistogram(TH1D *FluctHist, TH1D* PolyHist){
// ****************
if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist);
else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist);
if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist, rnd.get());
else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist, rnd.get());
}

// ****************
//KS: We have two methods how to apply statistical fluctuation standard is faster hence is default
void SampleSummary::MakeFluctuatedHistogram(TH2Poly *FluctHist, TH2Poly* PolyHist){
// ****************
if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist);
else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist);
if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist, rnd.get());
else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist, rnd.get());
}

// ****************
// Make Poisson Fluctuation of TH1D hist
void SampleSummary::MakeFluctuatedHistogramStandard(TH1D *FluctHist, TH1D* PolyHist){
// ****************
// Make the Poisson fluctuated hist
FluctHist->Reset("");
FluctHist->Fill(0.0, 0.0);

for (int i = 1; i <= PolyHist->GetXaxis()->GetNbins(); ++i)
{
// Get the posterior predictive bin content
const double MeanContent = PolyHist->GetBinContent(i);
// Get a Poisson fluctuation of the content
const double Random = rnd->PoissonD(MeanContent);
// Set the fluctuated histogram content to the Poisson variation of the posterior predictive histogram
FluctHist->SetBinContent(i,Random);
}
}

// ****************
// Make Poisson Fluctuation of TH2Poly hist
void SampleSummary::MakeFluctuatedHistogramStandard(TH2Poly *FluctHist, TH2Poly* PolyHist){
// ****************
// Make the Poisson fluctuated hist
FluctHist->Reset("");
FluctHist->Fill(0.0, 0.0, 0.0);

for (int i = 1; i < FluctHist->GetNumberOfBins()+1; ++i)
{
// Get the posterior predictive bin content
const double MeanContent = PolyHist->GetBinContent(i);
// Get a Poisson fluctuation of the content
const double Random = rnd->PoissonD(MeanContent);
// Set the fluctuated histogram content to the Poisson variation of the posterior predictive histogram
FluctHist->SetBinContent(i,Random);
}
}

// ****************
// Make Poisson Fluctuation of TH1D hist
void SampleSummary::MakeFluctuatedHistogramAlternative(TH1D* FluctHist, TH1D* PolyHist){
// ****************
// Make the Poisson fluctuated hist
FluctHist->Reset("");
FluctHist->Fill(0.0, 0.0);

const double evrate = PolyHist->Integral();
const int num = rnd->PoissonD(evrate);
int count = 0;
while(count < num)
{
const double candidate = PolyHist->GetRandom();
FluctHist->Fill(candidate);
count++;
}
}

// ****************
// Make Poisson fluctuation of TH2Poly hist
void SampleSummary::MakeFluctuatedHistogramAlternative(TH2Poly *FluctHist, TH2Poly* PolyHist){
// ****************
// Make the Poisson fluctuated hist
FluctHist->Reset("");
FluctHist->Fill(0.0, 0.0, 0.0);

const double evrate = NoOverflowIntegral(PolyHist);
const int num = rnd->PoissonD(evrate);
int count = 0;
while(count < num)
{
const int iBin = GetRandomPoly2(PolyHist);
FluctHist->SetBinContent(iBin, FluctHist->GetBinContent(iBin) + 1);
count++;
}
}

// ****************
//KS: ROOT developers were too lazy do develop getRanom2 for TH2Poly, this implementation is based on:
// https://root.cern.ch/doc/master/classTH2.html#a883f419e1f6899f9c4255b458d2afe2e
int SampleSummary::GetRandomPoly2(const TH2Poly* PolyHist){
// ****************
const int nbins = PolyHist->GetNumberOfBins();
const double r1 = rnd->Rndm();

double* fIntegral = new double[nbins+2];
fIntegral[0] = 0.0;

//KS: This is custom version of ComputeIntegral, once again ROOT was lazy :(
for (int i = 1; i < nbins+1; ++i)
{
fIntegral[i] = 0.0;
const double content = PolyHist->GetBinContent(i);
fIntegral[i] += fIntegral[i - 1] + content;
}
for (Int_t bin = 1; bin < nbins+1; ++bin) fIntegral[bin] /= fIntegral[nbins];
fIntegral[nbins+1] = PolyHist->GetEntries();

//KS: We just return one rather then X and Y, this way we can use SetBinContent rather than Fill, which is faster
int iBin = int(TMath::BinarySearch(nbins, fIntegral, r1));
//KS: Have to increment because TH2Poly has stupid offset arghh
iBin += 1;

delete[] fIntegral;
return iBin;
}

// ****************
// Get the mode error from a TH1D
double SampleSummary::GetModeError(TH1D* hpost){
// ****************
// Get the bin which has the largest posterior density
int MaxBin = hpost->GetMaximumBin();

// The total integral of the posterior
const double Integral = hpost->Integral();

int LowBin = MaxBin;
int HighBin = MaxBin;
double sum = hpost->GetBinContent(MaxBin);;
double LowCon = 0.0;
double HighCon = 0.0;
while (sum/Integral < 0.6827 && (LowBin > 0 || HighBin < hpost->GetNbinsX()+1) )
{
LowCon = 0.0;
HighCon = 0.0;
//KS:: Move further only if you haven't reached histogram end
if(LowBin > 1)
{
LowBin--;
LowCon = hpost->GetBinContent(LowBin);
}
if(HighBin < hpost->GetNbinsX())
{
HighBin++;
HighCon = hpost->GetBinContent(HighBin);
}

// If we're on the last slice and the lower contour is larger than the upper
if ((sum+LowCon+HighCon)/Integral > 0.6827 && LowCon > HighCon) {
sum += LowCon;
break;
// If we're on the last slice and the upper contour is larger than the lower
} else if ((sum+LowCon+HighCon)/Integral > 0.6827 && HighCon >= LowCon) {
sum += HighCon;
break;
} else {
sum += LowCon + HighCon;
}
}

double Mode_Error = 0.0;
if (LowCon > HighCon) {
Mode_Error = std::fabs(hpost->GetBinCenter(LowBin)-hpost->GetBinCenter(MaxBin));
} else {
Mode_Error = std::fabs(hpost->GetBinCenter(HighBin)-hpost->GetBinCenter(MaxBin));
}

return Mode_Error;
}

// ****************
void SampleSummary::StudyInformationCriterion(M3::kInfCrit Criterion) {
Expand Down Expand Up @@ -2591,14 +2385,3 @@ void SampleSummary::StudyWAIC() {
MACH3LOG_INFO("Effective number of parameters following WAIC formalism is equal to: {:.2f}", p_WAIC);
MACH3LOG_INFO("WAIC = {:.2f}", WAIC);
}

// ****************
//Fast and thread safe fill of violin histogram, it assumes both histograms have the same binning
void SampleSummary::FastViolinFill(TH2D* violin, TH1D* hist_1d){
// ****************
for (int x = 0; x < violin->GetXaxis()->GetNbins(); ++x)
{
const double y = violin->GetYaxis()->FindBin(hist_1d->GetBinContent(x+1));
violin->SetBinContent(x+1, y, violin->GetBinContent(x+1, y)+1);
}
}
Loading

0 comments on commit 9c2e10c

Please sign in to comment.