diff --git a/CMakeLists.txt b/CMakeLists.txt index 149ce6c57..49ecf45ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ set(CMAKE_VERBOSE_MAKEFILE ON) # CMake version check cmake_minimum_required(VERSION 3.14 FATAL_ERROR) -project(MaCh3 VERSION 1.3.3 LANGUAGES CXX) +project(MaCh3 VERSION 1.3.4 LANGUAGES CXX) set(MaCh3_VERSION ${PROJECT_VERSION}) #LP - This option name is confusing, but I wont change it now. diff --git a/Doc/Doxyfile b/Doc/Doxyfile index 47577ccbf..4d41d8071 100644 --- a/Doc/Doxyfile +++ b/Doc/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "MaCh3" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 1.3.3 +PROJECT_NUMBER = 1.3.4 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/README.md b/README.md index fc0606bf5..e5204d848 100644 --- a/README.md +++ b/README.md @@ -181,19 +181,23 @@ The MaCh3 core plotting library code can be found [here](https://github.com/mach ## How To Use This is an example how your executable can look like using MaCh3: ```cpp - manager *fitMan = nullptr; //Manager is responsible for reading from config + //Manager is responsible for reading from config + std::unique_ptr fitMan = MaCh3ManagerFactory(argc, argv); std::vector sample; //vector storing information about sample for different detector std::vector Cov; // vector with systematic implementation - mcmc *markovChain = nullptr; // MCMC class, can be replaced with other fitting method - MakeMaCh3Instance(fitMan, sample, Cov, markovChain); //Factory like function which initialises everything + MakeMaCh3Instance(fitMan.get(), sample, Cov); //Factory like function which initialises everything + + // FitterBase class, can be replaced with other fitting method + std::unique_ptr MarkovChain = MaCh3FitterFactory(FitManager.get()); //Adding samples and covariances to the Fitter class could be in the factory for(unsigned int i = 0; sample.size(); i++) - markovChain->addSamplePDF(sample[i]); + MarkovChain->addSamplePDF(sample[i]); for(unsigned int i = 0; Cov.size(); i++) - markovChain->addSystObj(Cov[i]); + MarkovChain->addSystObj(Cov[i]); - markovChain->RunLLHScan(); // can run LLH scan - markovChain->runMCMC(); //or run actual fit + MarkovChain->RunLLHScan(); // can run LLH scan + MarkovChain->runMCMC(); //or run actual fit ``` +For more see [here](https://github.com/mach3-software/MaCh3Tutorial/blob/main/Tutorial/MCMCTutorial.cpp) diff --git a/mcmc/CMakeLists.txt b/mcmc/CMakeLists.txt index 997e74a8c..436f4b26f 100644 --- a/mcmc/CMakeLists.txt +++ b/mcmc/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(MCMC SHARED MCMCProcessor.cpp SampleSummary.cpp MaCh3Factory.cpp + StatisticalUtils.cpp $<$>:gpuMCMCProcessorUtils.cu> ) diff --git a/mcmc/SampleSummary.cpp b/mcmc/SampleSummary.cpp index 686281ed3..d9b3cf096 100644 --- a/mcmc/SampleSummary.cpp +++ b/mcmc/SampleSummary.cpp @@ -151,7 +151,6 @@ SampleSummary::SampleSummary(const int n_Samples, const std::string &Filename, s }//end loop over samples DoByModePlots = false; - MeanHist_ByMode = nullptr; PosteriorHist_ByMode = nullptr; nModelParams = 0; @@ -184,10 +183,8 @@ SampleSummary::~SampleSummary() { if(MeanHist_ByMode[i][j] != nullptr) delete MeanHist_ByMode[i][j]; } delete[] PosteriorHist_ByMode[i]; - delete[] MeanHist_ByMode[i]; } delete[] PosteriorHist_ByMode; - delete[] MeanHist_ByMode; } for (int i = 0; i < nSamples; ++i) @@ -217,7 +214,7 @@ SampleSummary::~SampleSummary() { // ******************* // Check size of sample against size of vectors -bool SampleSummary::CheckSamples(int Length) { +bool SampleSummary::CheckSamples(const int Length) { // ******************* bool ok = (nSamples == Length); if (!ok) { @@ -266,7 +263,6 @@ void SampleSummary::AddData(std::vector &Data) { // Add the nominal histograms to the list (will have N_samples of these) void SampleSummary::AddNominal(std::vector &Nominal, std::vector &NomW2) { // ******************* - const int Length = int(Nominal.size()); if (!CheckSamples(Length)) throw MaCh3Exception(__FILE__ , __LINE__ ); @@ -411,7 +407,6 @@ void SampleSummary::AddNominal(std::vector &Nominal, std::vector &SampleVector, std::vector &W2Vec, const double LLHPenalty, const double Weight, const int DrawNumber) { // ******************* - nThrows++; //KS: Only make sense for PosteriorPredictive if( !isPriorPredictive )RandomHist->Fill(DrawNumber); @@ -490,7 +485,6 @@ void SampleSummary::AddThrow(std::vector &SampleVector, std::vector> &SampleVector_ByMode) { // ******************* - MCVectorByMode.push_back(SampleVector_ByMode); //KS: This means this is first time @@ -498,14 +492,13 @@ void SampleSummary::AddThrowByMode(std::vector> &SampleVec { MACH3LOG_INFO("Turning reaction breadkwon mode, brum brum"); PosteriorHist_ByMode = new TH1D***[nSamples]; - MeanHist_ByMode = new TH2Poly**[nSamples]; - + MeanHist_ByMode.resize(nSamples); for (int SampleNum = 0; SampleNum < nSamples; SampleNum++) { if (DataHist[SampleNum] == nullptr) continue; PosteriorHist_ByMode[SampleNum] = new TH1D**[Modes->GetNModes()+1]; - MeanHist_ByMode[SampleNum] = new TH2Poly*[Modes->GetNModes()+1]; + MeanHist_ByMode[SampleNum].resize(Modes->GetNModes()+1); for (int j = 0; j < Modes->GetNModes()+1; j++) { PosteriorHist_ByMode[SampleNum][j] = new TH1D*[maxBins[SampleNum]+1]; @@ -981,7 +974,7 @@ void SampleSummary::Write() { } // ******************* -// Make the posterior predictive distributions: fit Poissons etc +// Make the posterior predictive distributions: fit Poisson etc void SampleSummary::MakePredictive() { // ******************* // First make the projection on the z axis of the TH3D* for every pmu cosmu bin @@ -1617,46 +1610,6 @@ void SampleSummary::MakeCutEventRate(TH1D *Histogram, const double DataRate) { delete Fitter; } -// **************** -// Make a ratio histogram -template -HistType* SampleSummary::RatioHists(HistType *NumHist, HistType *DenomHist) { -// **************** - HistType *NumCopy = static_cast(NumHist->Clone()); - std::string title = std::string(DenomHist->GetName()) + "_ratio"; - NumCopy->SetNameTitle(title.c_str(), title.c_str()); - NumCopy->Divide(DenomHist); - - return NumCopy; -} - -// **************** -// Make a ratio th2poly -TH2Poly* SampleSummary::RatioPolys(TH2Poly *NumHist, TH2Poly *DenomHist) { -// **************** - TH2Poly *NumCopy = static_cast(NumHist->Clone()); - std::string title = std::string(DenomHist->GetName()) + "_ratio"; - NumCopy->SetNameTitle(title.c_str(), title.c_str()); - - for(int i = 1; i < NumCopy->GetNumberOfBins()+1; ++i) - { - NumCopy->SetBinContent(i,NumHist->GetBinContent(i)/DenomHist->GetBinContent(i)); - } - - return NumCopy; -} - -// **************** -// Normalise a TH2Poly -void SampleSummary::NormaliseTH2Poly(TH2Poly* Histogram){ -// **************** - const double Integral = NoOverflowIntegral(Histogram); - for(int j = 1; j < Histogram->GetNumberOfBins()+1; j++) - { - Histogram->SetBinContent(j, Histogram->GetBinContent(j)/Integral); - } -} - // **************** // Calculate the LLH for TH1D, set the LLH to title of MCHist void SampleSummary::CalcLLH(TH1D * const & DatHist, TH1D * const & MCHist, TH1D * const & W2Hist) { @@ -2283,177 +2236,18 @@ TH1D* SampleSummary::ProjectPoly(TH2Poly* Histogram, const bool ProjectX, const //KS: We have two methods how to apply statistical fluctuation standard is faster hence is default void SampleSummary::MakeFluctuatedHistogram(TH1D *FluctHist, TH1D* PolyHist){ // **************** - if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist); - else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist); + if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist, rnd.get()); + else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist, rnd.get()); } // **************** //KS: We have two methods how to apply statistical fluctuation standard is faster hence is default void SampleSummary::MakeFluctuatedHistogram(TH2Poly *FluctHist, TH2Poly* PolyHist){ // **************** - if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist); - else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist); + if(StandardFluctuation) MakeFluctuatedHistogramStandard(FluctHist, PolyHist, rnd.get()); + else MakeFluctuatedHistogramAlternative(FluctHist, PolyHist, rnd.get()); } -// **************** -// Make Poisson Fluctuation of TH1D hist -void SampleSummary::MakeFluctuatedHistogramStandard(TH1D *FluctHist, TH1D* PolyHist){ -// **************** - // Make the Poisson fluctuated hist - FluctHist->Reset(""); - FluctHist->Fill(0.0, 0.0); - - for (int i = 1; i <= PolyHist->GetXaxis()->GetNbins(); ++i) - { - // Get the posterior predictive bin content - const double MeanContent = PolyHist->GetBinContent(i); - // Get a Poisson fluctuation of the content - const double Random = rnd->PoissonD(MeanContent); - // Set the fluctuated histogram content to the Poisson variation of the posterior predictive histogram - FluctHist->SetBinContent(i,Random); - } -} - -// **************** -// Make Poisson Fluctuation of TH2Poly hist -void SampleSummary::MakeFluctuatedHistogramStandard(TH2Poly *FluctHist, TH2Poly* PolyHist){ -// **************** - // Make the Poisson fluctuated hist - FluctHist->Reset(""); - FluctHist->Fill(0.0, 0.0, 0.0); - - for (int i = 1; i < FluctHist->GetNumberOfBins()+1; ++i) - { - // Get the posterior predictive bin content - const double MeanContent = PolyHist->GetBinContent(i); - // Get a Poisson fluctuation of the content - const double Random = rnd->PoissonD(MeanContent); - // Set the fluctuated histogram content to the Poisson variation of the posterior predictive histogram - FluctHist->SetBinContent(i,Random); - } -} - -// **************** -// Make Poisson Fluctuation of TH1D hist -void SampleSummary::MakeFluctuatedHistogramAlternative(TH1D* FluctHist, TH1D* PolyHist){ -// **************** - // Make the Poisson fluctuated hist - FluctHist->Reset(""); - FluctHist->Fill(0.0, 0.0); - - const double evrate = PolyHist->Integral(); - const int num = rnd->PoissonD(evrate); - int count = 0; - while(count < num) - { - const double candidate = PolyHist->GetRandom(); - FluctHist->Fill(candidate); - count++; - } -} - -// **************** -// Make Poisson fluctuation of TH2Poly hist -void SampleSummary::MakeFluctuatedHistogramAlternative(TH2Poly *FluctHist, TH2Poly* PolyHist){ -// **************** - // Make the Poisson fluctuated hist - FluctHist->Reset(""); - FluctHist->Fill(0.0, 0.0, 0.0); - - const double evrate = NoOverflowIntegral(PolyHist); - const int num = rnd->PoissonD(evrate); - int count = 0; - while(count < num) - { - const int iBin = GetRandomPoly2(PolyHist); - FluctHist->SetBinContent(iBin, FluctHist->GetBinContent(iBin) + 1); - count++; - } -} - -// **************** -//KS: ROOT developers were too lazy do develop getRanom2 for TH2Poly, this implementation is based on: -// https://root.cern.ch/doc/master/classTH2.html#a883f419e1f6899f9c4255b458d2afe2e -int SampleSummary::GetRandomPoly2(const TH2Poly* PolyHist){ -// **************** - const int nbins = PolyHist->GetNumberOfBins(); - const double r1 = rnd->Rndm(); - - double* fIntegral = new double[nbins+2]; - fIntegral[0] = 0.0; - - //KS: This is custom version of ComputeIntegral, once again ROOT was lazy :( - for (int i = 1; i < nbins+1; ++i) - { - fIntegral[i] = 0.0; - const double content = PolyHist->GetBinContent(i); - fIntegral[i] += fIntegral[i - 1] + content; - } - for (Int_t bin = 1; bin < nbins+1; ++bin) fIntegral[bin] /= fIntegral[nbins]; - fIntegral[nbins+1] = PolyHist->GetEntries(); - - //KS: We just return one rather then X and Y, this way we can use SetBinContent rather than Fill, which is faster - int iBin = int(TMath::BinarySearch(nbins, fIntegral, r1)); - //KS: Have to increment because TH2Poly has stupid offset arghh - iBin += 1; - - delete[] fIntegral; - return iBin; -} - -// **************** -// Get the mode error from a TH1D -double SampleSummary::GetModeError(TH1D* hpost){ -// **************** - // Get the bin which has the largest posterior density - int MaxBin = hpost->GetMaximumBin(); - - // The total integral of the posterior - const double Integral = hpost->Integral(); - - int LowBin = MaxBin; - int HighBin = MaxBin; - double sum = hpost->GetBinContent(MaxBin);; - double LowCon = 0.0; - double HighCon = 0.0; - while (sum/Integral < 0.6827 && (LowBin > 0 || HighBin < hpost->GetNbinsX()+1) ) - { - LowCon = 0.0; - HighCon = 0.0; - //KS:: Move further only if you haven't reached histogram end - if(LowBin > 1) - { - LowBin--; - LowCon = hpost->GetBinContent(LowBin); - } - if(HighBin < hpost->GetNbinsX()) - { - HighBin++; - HighCon = hpost->GetBinContent(HighBin); - } - - // If we're on the last slice and the lower contour is larger than the upper - if ((sum+LowCon+HighCon)/Integral > 0.6827 && LowCon > HighCon) { - sum += LowCon; - break; - // If we're on the last slice and the upper contour is larger than the lower - } else if ((sum+LowCon+HighCon)/Integral > 0.6827 && HighCon >= LowCon) { - sum += HighCon; - break; - } else { - sum += LowCon + HighCon; - } - } - - double Mode_Error = 0.0; - if (LowCon > HighCon) { - Mode_Error = std::fabs(hpost->GetBinCenter(LowBin)-hpost->GetBinCenter(MaxBin)); - } else { - Mode_Error = std::fabs(hpost->GetBinCenter(HighBin)-hpost->GetBinCenter(MaxBin)); - } - - return Mode_Error; -} // **************** void SampleSummary::StudyInformationCriterion(M3::kInfCrit Criterion) { @@ -2591,14 +2385,3 @@ void SampleSummary::StudyWAIC() { MACH3LOG_INFO("Effective number of parameters following WAIC formalism is equal to: {:.2f}", p_WAIC); MACH3LOG_INFO("WAIC = {:.2f}", WAIC); } - -// **************** -//Fast and thread safe fill of violin histogram, it assumes both histograms have the same binning -void SampleSummary::FastViolinFill(TH2D* violin, TH1D* hist_1d){ -// **************** - for (int x = 0; x < violin->GetXaxis()->GetNbins(); ++x) - { - const double y = violin->GetYaxis()->FindBin(hist_1d->GetBinContent(x+1)); - violin->SetBinContent(x+1, y, violin->GetBinContent(x+1, y)+1); - } -} diff --git a/mcmc/SampleSummary.h b/mcmc/SampleSummary.h index b8dc0acc9..b357e20f8 100644 --- a/mcmc/SampleSummary.h +++ b/mcmc/SampleSummary.h @@ -101,11 +101,6 @@ class SampleSummary { /// @brief Check the length of samples agrees inline bool CheckSamples(const int Length); - /// @brief Helper to make ratio histograms - template HistType* RatioHists(HistType* NumHist, HistType* DenomHist); - /// @brief Helper to make ratio of TH2Polys - inline TH2Poly* RatioPolys(TH2Poly* NumPoly, TH2Poly* DenomPoly); - /// @brief Helper to project TH2D onto axis inline TH1D* ProjectHist(TH2D* Histogram, const bool ProjectX); /// @brief Helper to project TH2Poly onto axis @@ -113,28 +108,9 @@ class SampleSummary { /// @brief Make Poisson fluctuation of TH1D hist inline void MakeFluctuatedHistogram(TH1D *FluctHist, TH1D* PolyHist); - /// @brief Make Poisson fluctuation of TH1D hist using default fast method - inline void MakeFluctuatedHistogramStandard(TH1D *FluctHist, TH1D* PolyHist); - /// @brief Make Poisson fluctuation of TH1D hist using slow method which is only for cross-check - inline void MakeFluctuatedHistogramAlternative(TH1D *FluctHist, TH1D* PolyHist); /// @brief Make Poisson fluctuation of TH2Poly hist inline void MakeFluctuatedHistogram(TH2Poly *FluctHist, TH2Poly* PolyHist); - /// @brief Make Poisson fluctuation of TH2Poly hist using default fast method - inline void MakeFluctuatedHistogramStandard(TH2Poly *FluctHist, TH2Poly* PolyHist); - /// @brief Make Poisson fluctuation of TH2Poly hist using slow method which is only for cross-check - inline void MakeFluctuatedHistogramAlternative(TH2Poly *FluctHist, TH2Poly* PolyHist); - - /// @brief KS: Fill Violin histogram with entry from a toy - /// @param violin hist that will be filled - /// @param hist_1d refence hist from which we take entries to be filled - inline void FastViolinFill(TH2D* violin, TH1D* hist_1d); - /// @brief KS: ROOT developers were too lazy do develop getRanom2 for TH2Poly, this implementation is based on [link](https://root.cern.ch/doc/master/classTH2.html#a883f419e1f6899f9c4255b458d2afe2e) - inline int GetRandomPoly2(const TH2Poly* PolyHist); - - /// @brief Get the mode error from a TH1D - /// @param hpost hist from which we extract mode error - inline double GetModeError(TH1D* hpost); /// @brief Information Criterion inline void StudyInformationCriterion(M3::kInfCrit Criterion); @@ -153,10 +129,6 @@ class SampleSummary { /// @cite Hartig2024WAIC inline void StudyWAIC(); - /// @brief Helper to Normalise histograms - /// @param Histogram hist which we normalise - inline void NormaliseTH2Poly(TH2Poly* Histogram); - /// Random number generator std::unique_ptr rnd; /// KS: Hacky flag to let us know if this is first toy @@ -358,7 +330,7 @@ class SampleSummary { /// By mode variables bool DoByModePlots; /// The posterior predictive distribution in pmu cosmu using the mean - TH2Poly ***MeanHist_ByMode; + std::vector> MeanHist_ByMode; /// Histogram which corresponds to each bin in the sample's th2poly TH1D ****PosteriorHist_ByMode; diff --git a/mcmc/StatisticalUtils.cpp b/mcmc/StatisticalUtils.cpp new file mode 100644 index 000000000..c6a48166c --- /dev/null +++ b/mcmc/StatisticalUtils.cpp @@ -0,0 +1,597 @@ +//MaCh3 includes +#include "mcmc/StatisticalUtils.h" + +// ************************** +std::string GetJeffreysScale(const double BayesFactor){ +// ************************** + std::string JeffreysScale = ""; + //KS: Get fancy Jeffreys Scale as I am to lazy to look into table every time + if(BayesFactor < 0) JeffreysScale = "Negative"; + else if( 5 > BayesFactor) JeffreysScale = "Barely worth mentioning"; + else if( 10 > BayesFactor) JeffreysScale = "Substantial"; + else if( 15 > BayesFactor) JeffreysScale = "Strong"; + else if( 20 > BayesFactor) JeffreysScale = "Very strong"; + else JeffreysScale = "Decisive"; + + return JeffreysScale; +} + +// ************************** +std::string GetDunneKaboth(const double BayesFactor){ +// ************************** + std::string DunneKaboth = ""; + //KS: Get fancy DunneKaboth Scale as I am to lazy to look into table every time + + if(2.125 > BayesFactor) DunneKaboth = "< 1 #sigma"; + else if( 20.74 > BayesFactor) DunneKaboth = "> 1 #sigma"; + else if( 369.4 > BayesFactor) DunneKaboth = "> 2 #sigma"; + else if( 15800 > BayesFactor) DunneKaboth = "> 3 #sigma"; + else if( 1745000 > BayesFactor) DunneKaboth = "> 4 #sigma"; + else DunneKaboth = "> 5 #sigma"; + + return DunneKaboth; +} + +// ********************* +double GetSigmaValue(const int sigma) { +// ********************* + double width = 0; + switch (std::abs(sigma)) + { + case 1: + width = 0.682689492137; + break; + case 2: + width = 0.954499736104; + break; + case 3: + width = 0.997300203937; + break; + case 4: + width = 0.999936657516; + break; + case 5: + width = 0.999999426697; + break; + case 6: + width = 0.999999998027; + break; + default: + MACH3LOG_ERROR("{} is unsupported value of sigma", sigma); + throw MaCh3Exception(__FILE__ , __LINE__ ); + break; + } + return width; +} + +// **************** +double GetBIC(const double llh, const int data, const int nPars){ +// **************** + if(nPars == 0) + { + MACH3LOG_ERROR("You haven't passed number of model parameters as it is still zero"); + throw MaCh3Exception(__FILE__ , __LINE__ ); + } + const double BIC = double(nPars * logl(data) + llh); + + return BIC; +} + +// **************** +double GetNeffective(const int N1, const int N2) { +// **************** + const double Nominator = (N1+N2); + const double Denominator = (N1*N2); + const double N_e = Nominator/Denominator; + return N_e; +} + +// **************** +void CheckBonferoniCorrectedpValue(const std::vector& SampleNameVec, + const std::vector& PValVec, + const double Threshold) { +// **************** + MACH3LOG_INFO(""); + if(SampleNameVec.size() != PValVec.size()) + { + MACH3LOG_ERROR("Size of vectors do not match"); + throw MaCh3Exception(__FILE__ , __LINE__ ); + } + const size_t NumberOfStatisticalTests = SampleNameVec.size(); + //KS: 0.05 or 5% is value used by T2K. + const double StatisticalSignificanceDown = Threshold / double(NumberOfStatisticalTests); + const double StatisticalSignificanceUp = 1 - StatisticalSignificanceDown; + MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f}", StatisticalSignificanceDown); + + int Counter = 0; + for(unsigned int i = 0; i < SampleNameVec.size(); i++) + { + if( (PValVec[i] < 0.5 && PValVec[i] < StatisticalSignificanceDown) ) { + MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]); + MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceDown, PValVec[i]); + Counter++; + } else if( (PValVec[i] > 0.5 && PValVec[i] > StatisticalSignificanceUp) ) { + MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]); + MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceUp, PValVec[i]); + Counter++; + } + } + if(Counter == 0) { + MACH3LOG_INFO("Every sample passed Bonferroni-corrected statistical significance level test"); + } else { + MACH3LOG_WARN("{} samples didn't pass Bonferroni-corrected statistical significance level test", Counter); + } + MACH3LOG_INFO(""); +} + +// **************** +double GetAndersonDarlingTestStat(const double CumulativeData, const double CumulativeMC, const double CumulativeJoint) { +// **************** + double ADstat = std::fabs(CumulativeData - CumulativeMC)/ std::sqrt(CumulativeJoint*(1 - CumulativeJoint)); + + if( std::isinf(ADstat) || std::isnan(ADstat)) return 0; + return ADstat; +} + +// **************** +int GetNumberOfRuns(const std::vector& GroupClasifier) { +// **************** + int NumberOfRuns = 0; + int PreviousGroup = -999; + + //KS: If group changed increment run + for (unsigned int i = 0; i < GroupClasifier.size(); i++) + { + if(GroupClasifier[i] != PreviousGroup) + NumberOfRuns++; + PreviousGroup = GroupClasifier[i]; + } + + return NumberOfRuns; +} + +// **************** +double GetBetaParameter(const double data, const double mc, const double w2, TestStatistic TestStat) { +// **************** + double Beta = 0.0; + + if (TestStat == kDembinskiAbdelmottele) { + //the so-called effective count + const double k = mc*mc / w2; + //Calculate beta which is scaling factor between true and generated MC + Beta = (data + k) / (mc + k); + } + //KS: Below is technically only true for Cowan's BB, which will not be true for Poisson or IceCube, because why not... + else { + // CW: Barlow-Beeston uses fractional uncertainty on MC, so sqrt(sum[w^2])/mc + const double fractional = std::sqrt(w2)/mc; + // CW: -b/2a in quadratic equation + const double temp = mc*fractional*fractional-1; + // CW: b^2 - 4ac in quadratic equation + const double temp2 = temp*temp + 4*data*fractional*fractional; + if (temp2 < 0) { + MACH3LOG_ERROR("Negative square root in Barlow Beeston coefficient calculation!"); + throw MaCh3Exception(__FILE__ , __LINE__ ); + } + // CW: Solve for the positive beta + Beta = (-1*temp+std::sqrt(temp2))/2.; + } + return Beta; +} + + +// ********************* +double GetSubOptimality(const std::vector& EigenValues, const int TotalTarameters) { +// ********************* + double sum_eigenvalues_squared_inv = 0.0; + double sum_eigenvalues_inv = 0.0; + for (unsigned int j = 0; j < EigenValues.size(); j++) + { + //KS: IF Eigen values are super small skip them + //if(EigenValues[j] < 0.0000001) continue; + sum_eigenvalues_squared_inv += std::pow(EigenValues[j], -2); + sum_eigenvalues_inv += 1.0 / EigenValues[j]; + } + const double SubOptimality = TotalTarameters * sum_eigenvalues_squared_inv / std::pow(sum_eigenvalues_inv, 2); + return SubOptimality; +} + + +// ************************** +void GetArithmetic(TH1D * const hist, double& Mean, double& Error) { +// ************************** + Mean = hist->GetMean(); + Error = hist->GetRMS(); +} + +// ************************** +void GetGaussian(TH1D*& hist, TF1* gauss, double& Mean, double& Error) { +// ************************** + const double meanval = hist->GetMean(); + const double err = hist->GetRMS(); + const double peakval = hist->GetBinCenter(hist->GetMaximumBin()); + + // Set the range for the Gaussian fit + gauss->SetRange(meanval - 1.5*err , meanval + 1.5*err); + // Set the starting parameters close to RMS and peaks of the histograms + gauss->SetParameters(hist->GetMaximum()*err*std::sqrt(2*3.14), peakval, err); + + // Perform the fit + hist->Fit(gauss->GetName(),"Rq"); + hist->SetStats(0); + + Mean = gauss->GetParameter(1); + Error = gauss->GetParameter(2); +} + +// *************** +void GetHPD(TH1D* const hist, double& Mean, double& Error, double& Error_p, double& Error_m, const double coverage) { +// **************** + // Get the bin which has the largest posterior density + const int MaxBin = hist->GetMaximumBin(); + // And it's value + const double peakval = hist->GetBinCenter(MaxBin); + + // The total integral of the posterior + const long double Integral = hist->Integral(); + //KS: and integral of left handed and right handed parts + const long double LowIntegral = hist->Integral(1, MaxBin-1) + hist->GetBinContent(MaxBin)/2.0; + const long double HighIntegral = hist->Integral(MaxBin+1, hist->GetNbinsX()) + hist->GetBinContent(MaxBin)/2.0; + + // Keep count of how much area we're covering + //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error + long double sum = hist->GetBinContent(MaxBin)/2.0; + + // Counter for current bin + int CurrBin = MaxBin; + while (sum/HighIntegral < coverage && CurrBin < hist->GetNbinsX()) { + CurrBin++; + sum += hist->GetBinContent(CurrBin); + } + const double sigma_p = std::fabs(hist->GetBinCenter(MaxBin)-hist->GetXaxis()->GetBinUpEdge(CurrBin)); + // Reset the sum + //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error + sum = hist->GetBinContent(MaxBin)/2.0; + + // Reset the bin counter + CurrBin = MaxBin; + // Counter for current bin + while (sum/LowIntegral < coverage && CurrBin > 1) { + CurrBin--; + sum += hist->GetBinContent(CurrBin); + } + const double sigma_m = std::fabs(hist->GetBinCenter(CurrBin)-hist->GetBinLowEdge(MaxBin)); + + // Now do the double sided HPD + //KS: Start sum from the HPD + sum = hist->GetBinContent(MaxBin); + int LowBin = MaxBin; + int HighBin = MaxBin; + long double LowCon = 0.0; + long double HighCon = 0.0; + + while (sum/Integral < coverage && (LowBin > 0 || HighBin < hist->GetNbinsX()+1)) + { + LowCon = 0.0; + HighCon = 0.0; + //KS:: Move further only if you haven't reached histogram end + if(LowBin > 1) + { + LowBin--; + LowCon = hist->GetBinContent(LowBin); + } + if(HighBin < hist->GetNbinsX()) + { + HighBin++; + HighCon = hist->GetBinContent(HighBin); + } + + // If we're on the last slice and the lower contour is larger than the upper + if ((sum+LowCon+HighCon)/Integral > coverage && LowCon > HighCon) { + sum += LowCon; + break; + // If we're on the last slice and the upper contour is larger than the lower + } else if ((sum+LowCon+HighCon)/Integral > coverage && HighCon >= LowCon) { + sum += HighCon; + break; + } else { + sum += LowCon + HighCon; + } + } + + double sigma_hpd = 0.0; + if (LowCon > HighCon) { + sigma_hpd = std::fabs(hist->GetBinLowEdge(LowBin)-hist->GetBinCenter(MaxBin)); + } else { + sigma_hpd = std::fabs(hist->GetXaxis()->GetBinUpEdge(HighBin)-hist->GetBinCenter(MaxBin)); + } + + Mean = peakval; + Error = sigma_hpd; + Error_p = sigma_p; + Error_m = sigma_m; +} + +// *************** +void GetCredibleInterval(TH1D* const hist, TH1D* hpost_copy, const double coverage) { +// *************** + if(coverage > 1) + { + MACH3LOG_ERROR("Specified Credible Interval is greater that 1 and equal to {} Should be between 0 and 1", coverage); + throw MaCh3Exception(__FILE__ , __LINE__ ); + } + //KS: Reset first copy of histogram + hpost_copy->Reset(""); + hpost_copy->Fill(0.0, 0.0); + + //KS: Temporary structure to be thread save + std::vector hist_copy(hist->GetXaxis()->GetNbins()+1); + std::vector hist_copy_fill(hist->GetXaxis()->GetNbins()+1); + for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i) + { + hist_copy[i] = hist->GetBinContent(i); + hist_copy_fill[i] = false; + } + + /// Loop over histogram bins with highest number of entries until covered 90 or 68.3% + const long double Integral = hist->Integral(); + long double sum = 0; + + while ((sum / Integral) < coverage) + { + /// Get bin of highest content and save the number of entries reached so far + int max_entry_bin = 0; + double max_entries = 0.; + for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i) + { + if (hist_copy[i] > max_entries) + { + max_entries = hist_copy[i]; + max_entry_bin = i; + } + } + /// Replace bin value by -1 so it is not looped over as being maximum bin again + hist_copy[max_entry_bin] = -1.; + hist_copy_fill[max_entry_bin] = true; + + sum += max_entries; + } + //KS: Now fill our copy only for bins which got included in coverage region + for(int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i) + { + if(hist_copy_fill[i]) hpost_copy->SetBinContent(i, hist->GetBinContent(i)); + } +} + +// *************** +void GetCredibleIntervalSig(TH1D* const hist, TH1D* hpost_copy, const bool CredibleInSigmas, const double coverage) { +// *************** + //KS: Slightly different approach depending if intervals are in percentage or sigmas + if(CredibleInSigmas) { + //KS: Convert sigmas into percentage + const double CredReg = GetSigmaValue(int(std::round(coverage))); + GetCredibleInterval(hist, hpost_copy, CredReg); + } else { + GetCredibleInterval(hist, hpost_copy, coverage); + } +} + +// *************** +void GetCredibleRegion(TH2D* const hist2D, const double coverage) { +// *************** + if(coverage > 1) + { + MACH3LOG_ERROR("Specified Credible Region is greater than 1 and equal to {:.2f} Should be between 0 and 1 {}", coverage); + throw MaCh3Exception(__FILE__ , __LINE__ ); + } + + //KS: Temporary structure to be thread save + std::vector> hist_copy(hist2D->GetXaxis()->GetNbins()+1, + std::vector(hist2D->GetYaxis()->GetNbins()+1)); + for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i) { + for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j) { + hist_copy[i][j] = hist2D->GetBinContent(i, j); + } + } + + /// Loop over histogram bins with highest number of entries until covered 90 or 68.3% + const long double Integral = hist2D->Integral(); + long double sum = 0; + + //We need to as ROOT requires array to set to contour + double Contour[1]; + while ((sum / Integral) < coverage) + { + /// Get bin of highest content and save the number of entries reached so far + int max_entry_bin_x = 0; + int max_entry_bin_y = 0; + double max_entries = 0.; + for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i) + { + for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j) + { + if (hist_copy[i][j] > max_entries) + { + max_entries = hist_copy[i][j]; + max_entry_bin_x = i; + max_entry_bin_y = j; + } + } + } + /// Replace bin value by -1 so it is not looped over as being maximum bin again + hist_copy[max_entry_bin_x][max_entry_bin_y] = -1.; + + sum += max_entries; + Contour[0] = max_entries; + } + hist2D->SetContour(1, Contour); +} + +// *************** +void GetCredibleRegionSig(TH2D* const hist2D, const bool CredibleInSigmas, const double coverage) { +// *************** + if(CredibleInSigmas) { + //KS: Convert sigmas into percentage + const double CredReg = GetSigmaValue(int(std::round(coverage))); + GetCredibleRegion(hist2D, CredReg); + } else { + GetCredibleRegion(hist2D, coverage); + } +} + +// ********************* +double GetIQR(TH1D *Hist) { +// ********************* + if(Hist->Integral() == 0) return 0.0; + + constexpr double quartiles_x[3] = {0.25, 0.5, 0.75}; + double quartiles[3]; + + Hist->GetQuantiles(3, quartiles, quartiles_x); + + return quartiles[2] - quartiles[0]; +} + +// ******************** +double ComputeKLDivergence(TH2Poly* DataPoly, TH2Poly* PolyMC) { +// ********************* + double klDivergence = 0.0; + double DataIntegral = NoOverflowIntegral(DataPoly); + double MCIntegral = NoOverflowIntegral(PolyMC); + for (int i = 1; i < DataPoly->GetNumberOfBins()+1; ++i) + { + if (DataPoly->GetBinContent(i) > 0 && PolyMC->GetBinContent(i) > 0) { + klDivergence += DataPoly->GetBinContent(i) / DataIntegral * + std::log((DataPoly->GetBinContent(i) / DataIntegral) / ( PolyMC->GetBinContent(i) / MCIntegral)); + } + } + return klDivergence; +} +// ******************** +double FisherCombinedPValue(const std::vector& pvalues) { +// ******************** + double testStatistic = 0; + for(size_t i = 0; i < pvalues.size(); i++) + { + const double pval = std::max(0.00001, pvalues[i]); + testStatistic += -2.0 * std::log(pval); + } + // Degrees of freedom is twice the number of p-values + int degreesOfFreedom = int(2 * pvalues.size()); + double pValue = TMath::Prob(testStatistic, degreesOfFreedom); + + return pValue; +} + +// ******************** +void ThinningMCMC(const std::string& FilePath, const int ThinningCut) { +// ******************** + // Define the path for the temporary thinned file + std::string TempFilePath = "Thinned_" + FilePath; + int ret = system(("cp " + FilePath + " " + TempFilePath).c_str()); + if (ret != 0) { + MACH3LOG_WARN("Error: system call to copy file failed with code {}", ret); + } + + TFile *inFile = TFile::Open(TempFilePath.c_str(), "UPDATE"); + if (!inFile || inFile->IsZombie()) { + MACH3LOG_ERROR("Error opening file: {}", TempFilePath); + throw MaCh3Exception(__FILE__, __LINE__); + } + + TTree *inTree = inFile->Get("posteriors"); + if (!inTree) { + MACH3LOG_ERROR("Error: TTree 'posteriors' not found in file."); + inFile->ls(); + inFile->Close(); + throw MaCh3Exception(__FILE__, __LINE__); + } + + // Clone the structure without data + TTree *outTree = inTree->CloneTree(0); + + // Loop over entries and apply thinning + Long64_t nEntries = inTree->GetEntries(); + double retainedPercentage = (double(nEntries) / ThinningCut) / double(nEntries) * 100; + MACH3LOG_INFO("Thinning will retain {:.2f}% of chains", retainedPercentage); + for (Long64_t i = 0; i < nEntries; i++) { + if (i % (nEntries/10) == 0) { + MaCh3Utils::PrintProgressBar(i, nEntries); + } + if (i % ThinningCut == 0) { + inTree->GetEntry(i); + outTree->Fill(); + } + } + inFile->WriteTObject(outTree, "posteriors", "kOverwrite"); + inFile->Close(); + delete inFile; + + MACH3LOG_INFO("Thinned TTree saved and overwrote original in: {}", TempFilePath); +} + +// ******************** +double GetZScore(const double value, const double mean, const double stddev) { +// ******************** + return (value - mean) / stddev; +} + +// ******************** +double GetPValueFromZScore(const double zScore) { +// ******************** + return 0.5 * std::erfc(-zScore / std::sqrt(2)); +} + +// **************** +// Get the mode error from a TH1D +double GetModeError(TH1D* hpost) { +// **************** + // Get the bin which has the largest posterior density + int MaxBin = hpost->GetMaximumBin(); + + // The total integral of the posterior + const double Integral = hpost->Integral(); + + int LowBin = MaxBin; + int HighBin = MaxBin; + double sum = hpost->GetBinContent(MaxBin);; + double LowCon = 0.0; + double HighCon = 0.0; + while (sum/Integral < 0.6827 && (LowBin > 0 || HighBin < hpost->GetNbinsX()+1) ) + { + LowCon = 0.0; + HighCon = 0.0; + //KS:: Move further only if you haven't reached histogram end + if(LowBin > 1) + { + LowBin--; + LowCon = hpost->GetBinContent(LowBin); + } + if(HighBin < hpost->GetNbinsX()) + { + HighBin++; + HighCon = hpost->GetBinContent(HighBin); + } + + // If we're on the last slice and the lower contour is larger than the upper + if ((sum+LowCon+HighCon)/Integral > 0.6827 && LowCon > HighCon) { + sum += LowCon; + break; + // If we're on the last slice and the upper contour is larger than the lower + } else if ((sum+LowCon+HighCon)/Integral > 0.6827 && HighCon >= LowCon) { + sum += HighCon; + break; + } else { + sum += LowCon + HighCon; + } + } + + double Mode_Error = 0.0; + if (LowCon > HighCon) { + Mode_Error = std::fabs(hpost->GetBinCenter(LowBin)-hpost->GetBinCenter(MaxBin)); + } else { + Mode_Error = std::fabs(hpost->GetBinCenter(HighBin)-hpost->GetBinCenter(MaxBin)); + } + + return Mode_Error; +} diff --git a/mcmc/StatisticalUtils.h b/mcmc/StatisticalUtils.h index c199c299a..e610c93db 100644 --- a/mcmc/StatisticalUtils.h +++ b/mcmc/StatisticalUtils.h @@ -17,265 +17,64 @@ /// @brief Utility functions for statistical interpretations in MaCh3 /// @author Kamil Skwarczynski -// ************************** /// @brief KS: Following H. Jeffreys \cite jeffreys1998theory /// @param BayesFactor Obtained value of Bayes factor -inline std::string GetJeffreysScale(const double BayesFactor){ -// ************************** - std::string JeffreysScale = ""; - //KS: Get fancy Jeffreys Scale as I am to lazy to look into table every time - if(BayesFactor < 0) JeffreysScale = "Negative"; - else if( 5 > BayesFactor) JeffreysScale = "Barely worth mentioning"; - else if( 10 > BayesFactor) JeffreysScale = "Substantial"; - else if( 15 > BayesFactor) JeffreysScale = "Strong"; - else if( 20 > BayesFactor) JeffreysScale = "Very strong"; - else JeffreysScale = "Decisive"; +std::string GetJeffreysScale(const double BayesFactor); - return JeffreysScale; -} - -// ************************** /// @brief KS: Based on Table 1 in https://www.t2k.org/docs/technotes/435 /// @param BayesFactor Obtained value of Bayes factor -inline std::string GetDunneKaboth(const double BayesFactor){ -// ************************** - std::string DunneKaboth = ""; - //KS: Get fancy DunneKaboth Scale as I am to lazy to look into table every time - - if(2.125 > BayesFactor) DunneKaboth = "< 1 #sigma"; - else if( 20.74 > BayesFactor) DunneKaboth = "> 1 #sigma"; - else if( 369.4 > BayesFactor) DunneKaboth = "> 2 #sigma"; - else if( 15800 > BayesFactor) DunneKaboth = "> 3 #sigma"; - else if( 1745000 > BayesFactor) DunneKaboth = "> 4 #sigma"; - else DunneKaboth = "> 5 #sigma"; - - return DunneKaboth; -} +std::string GetDunneKaboth(const double BayesFactor); -// ********************* /// @brief KS: Convert sigma from normal distribution into percentage -inline double GetSigmaValue(const int sigma) { -// ********************* - double width = 0; - switch (std::abs(sigma)) - { - case 1: - width = 0.682689492137; - break; - case 2: - width = 0.954499736104; - break; - case 3: - width = 0.997300203937; - break; - case 4: - width = 0.999936657516; - break; - case 5: - width = 0.999999426697; - break; - case 6: - width = 0.999999998027; - break; - default: - MACH3LOG_ERROR("{} is unsupported value of sigma", sigma); - throw MaCh3Exception(__FILE__ , __LINE__ ); - break; - } - return width; -} +double GetSigmaValue(const int sigma); -// **************** /// @brief Get the Bayesian Information Criterion (BIC) or Schwarz information criterion (also SIC, SBC, SBIC) -inline double GetBIC(const double llh, const int data, const int nPars){ -// **************** - if(nPars == 0) - { - MACH3LOG_ERROR("You haven't passed number of model parameters as it is still zero"); - throw MaCh3Exception(__FILE__ , __LINE__ ); - } - const double BIC = double(nPars * logl(data) + llh); +double GetBIC(const double llh, const int data, const int nPars); - return BIC; -} - -// **************** /// @brief KS: See 14.3.10 in Numerical Recipes in C /// \cite press1992numerical -inline double GetNeffective(const int N1, const int N2) { -// **************** - const double Nominator = (N1+N2); - const double Denominator = (N1*N2); - const double N_e = Nominator/Denominator; - return N_e; -} +double GetNeffective(const int N1, const int N2); -// **************** /// @brief KS: For more see https://www.t2k.org/docs/technotes/429/TN429_v8#page=63 /// @param SampleNameVec vector of sample names /// @param PValVec pvalue for each sample /// @param Threshold pvalue accepted threshold, usually 5% -inline void CheckBonferoniCorrectedpValue(const std::vector& SampleNameVec, +void CheckBonferoniCorrectedpValue(const std::vector& SampleNameVec, const std::vector& PValVec, - const double Threshold = 0.05) { -// **************** - MACH3LOG_INFO(""); - if(SampleNameVec.size() != PValVec.size()) - { - MACH3LOG_ERROR("Size of vectors do not match"); - throw MaCh3Exception(__FILE__ , __LINE__ ); - } - const size_t NumberOfStatisticalTests = SampleNameVec.size(); - //KS: 0.05 or 5% is value used by T2K. - const double StatisticalSignificanceDown = Threshold / double(NumberOfStatisticalTests); - const double StatisticalSignificanceUp = 1 - StatisticalSignificanceDown; - MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f}", StatisticalSignificanceDown); - - int Counter = 0; - for(unsigned int i = 0; i < SampleNameVec.size(); i++) - { - if( (PValVec[i] < 0.5 && PValVec[i] < StatisticalSignificanceDown) ) - { - MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]); - MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceDown, PValVec[i]); - Counter++; - } - else if( (PValVec[i] > 0.5 && PValVec[i] > StatisticalSignificanceUp) ) - { - MACH3LOG_INFO("Sample {} indicates disagreement between the model predictions and the data", SampleNameVec[i]); - MACH3LOG_INFO("Bonferroni-corrected statistical significance level: {:.2f} p-value: {:.2f}", StatisticalSignificanceUp, PValVec[i]); - Counter++; - } - } - if(Counter == 0) { - MACH3LOG_INFO("Every sample passed Bonferroni-corrected statistical significance level test"); - } else { - MACH3LOG_WARN("{} samples didn't pass Bonferroni-corrected statistical significance level test", Counter); - } - MACH3LOG_INFO(""); -} + const double Threshold = 0.05); -// **************** /// @param CumulativeData Value of CDF for data /// @param CumulativeMC Value of CDF for MC /// @param CumulativeJoint Value of CDF for joint data and MC distribution -inline double GetAndersonDarlingTestStat(const double CumulativeData, const double CumulativeMC, const double CumulativeJoint) { -// **************** - double ADstat = std::fabs(CumulativeData - CumulativeMC)/ std::sqrt(CumulativeJoint*(1 - CumulativeJoint)); - - if( std::isinf(ADstat) || std::isnan(ADstat)) return 0; - return ADstat; -} +double GetAndersonDarlingTestStat(const double CumulativeData, const double CumulativeMC, const double CumulativeJoint); -// **************** /// @brief KS: https://esjeevanand.uccollege.edu.in/wp-content/uploads/sites/114/2020/08/NON-PARAMTERIC-TEST-6.pdf -inline int GetNumberOfRuns(const std::vector& GroupClasifier) { -// **************** - int NumberOfRuns = 0; - int PreviousGroup = -999; +int GetNumberOfRuns(const std::vector& GroupClasifier); - //KS: If group changed increment run - for (unsigned int i = 0; i < GroupClasifier.size(); i++) - { - if(GroupClasifier[i] != PreviousGroup) - NumberOfRuns++; - PreviousGroup = GroupClasifier[i]; - } - - return NumberOfRuns; -} - -// **************** /// @brief KS: Calculate Beta parameter which will be different based on specified test statistic /// @param data Number of data events in a bin /// @param mc Number of MC events in a bin /// @param w2 Value of weight squared in a bin /// @param TestStat Test statistic based on which we calculate beta -inline double GetBetaParameter(const double data, const double mc, const double w2, TestStatistic TestStat) { -// **************** - double Beta = 0.0; - - if (TestStat == kDembinskiAbdelmottele) { - //the so-called effective count - const double k = mc*mc / w2; - //Calculate beta which is scaling factor between true and generated MC - Beta = (data + k) / (mc + k); - } - //KS: Below is technically only true for Cowan's BB, which will not be true for Poisson or IceCube, because why not... - else { - // CW: Barlow-Beeston uses fractional uncertainty on MC, so sqrt(sum[w^2])/mc - const double fractional = std::sqrt(w2)/mc; - // CW: -b/2a in quadratic equation - const double temp = mc*fractional*fractional-1; - // CW: b^2 - 4ac in quadratic equation - const double temp2 = temp*temp + 4*data*fractional*fractional; - if (temp2 < 0) { - MACH3LOG_ERROR("Negative square root in Barlow Beeston coefficient calculation!"); - throw MaCh3Exception(__FILE__ , __LINE__ ); - } - // CW: Solve for the positive beta - Beta = (-1*temp+std::sqrt(temp2))/2.; - } - return Beta; -} +double GetBetaParameter(const double data, const double mc, const double w2, TestStatistic TestStat); - -// ********************* /// @brief Based on \cite roberts2009adaptive /// @param EigenValues Eigen values of covariance matrix -inline double GetSubOptimality(const std::vector& EigenValues, const int TotalTarameters) { -// ********************* - double sum_eigenvalues_squared_inv = 0.0; - double sum_eigenvalues_inv = 0.0; - for (unsigned int j = 0; j < EigenValues.size(); j++) - { - //KS: IF Eigen values are super small skip them - //if(EigenValues[j] < 0.0000001) continue; - sum_eigenvalues_squared_inv += std::pow(EigenValues[j], -2); - sum_eigenvalues_inv += 1.0 / EigenValues[j]; - } - const double SubOptimality = TotalTarameters * sum_eigenvalues_squared_inv / std::pow(sum_eigenvalues_inv, 2); - return SubOptimality; -} - +double GetSubOptimality(const std::vector& EigenValues, const int TotalTarameters); -// ************************** /// @brief CW: Get Arithmetic mean from posterior /// @param hist histograms from which we extract arithmetic mean /// @param Mean Arithmetic Mean value /// @param Error Arithmetic Error value -inline void GetArithmetic(TH1D * const hist, double& Mean, double& Error) { -// ************************** - Mean = hist->GetMean(); - Error = hist->GetRMS(); -} +void GetArithmetic(TH1D * const hist, double& Mean, double& Error); -// ************************** /// @brief CW: Fit Gaussian to posterior /// @param hist histograms to which we fit gaussian /// @param gauss tf1 with gaussian, we pass pointer to make things faster /// @param Mean Gaussian Mean value /// @param Error Gaussian Error value -inline void GetGaussian(TH1D*& hist, TF1* gauss, double& Mean, double& Error) { -// ************************** - const double meanval = hist->GetMean(); - const double err = hist->GetRMS(); - const double peakval = hist->GetBinCenter(hist->GetMaximumBin()); +void GetGaussian(TH1D*& hist, TF1* gauss, double& Mean, double& Error); - // Set the range for the Gaussian fit - gauss->SetRange(meanval - 1.5*err , meanval + 1.5*err); - // Set the starting parameters close to RMS and peaks of the histograms - gauss->SetParameters(hist->GetMaximum()*err*std::sqrt(2*3.14), peakval, err); - - // Perform the fit - hist->Fit(gauss->GetName(),"Rq"); - hist->SetStats(0); - - Mean = gauss->GetParameter(1); - Error = gauss->GetParameter(2); -} - -// *************** /// @brief Get Highest Posterior Density (HPD) /// @param hist histograms from which we HPD /// @param Mean HPD Mean value @@ -283,339 +82,77 @@ inline void GetGaussian(TH1D*& hist, TF1* gauss, double& Mean, double& Error) { /// @param Error_p HPD Negative (left hand side) Error value /// @param Error_m HPD Positive (right hand side) Error value /// @param coverage What is defined coverage, by default 0.6827 (1 sigma) -inline void GetHPD(TH1D* const hist, double& Mean, double& Error, double& Error_p, double& Error_m, const double coverage = 0.6827) { -// **************** - // Get the bin which has the largest posterior density - const int MaxBin = hist->GetMaximumBin(); - // And it's value - const double peakval = hist->GetBinCenter(MaxBin); - - // The total integral of the posterior - const long double Integral = hist->Integral(); - //KS: and integral of left handed and right handed parts - const long double LowIntegral = hist->Integral(1, MaxBin-1) + hist->GetBinContent(MaxBin)/2.0; - const long double HighIntegral = hist->Integral(MaxBin+1, hist->GetNbinsX()) + hist->GetBinContent(MaxBin)/2.0; - - // Keep count of how much area we're covering - //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error - long double sum = hist->GetBinContent(MaxBin)/2.0; - - // Counter for current bin - int CurrBin = MaxBin; - while (sum/HighIntegral < coverage && CurrBin < hist->GetNbinsX()) { - CurrBin++; - sum += hist->GetBinContent(CurrBin); - } - const double sigma_p = std::fabs(hist->GetBinCenter(MaxBin)-hist->GetXaxis()->GetBinUpEdge(CurrBin)); - // Reset the sum - //KS: Take only half content of HPD bin as one half goes for right handed error and the other for left handed error - sum = hist->GetBinContent(MaxBin)/2.0; - - // Reset the bin counter - CurrBin = MaxBin; - // Counter for current bin - while (sum/LowIntegral < coverage && CurrBin > 1) { - CurrBin--; - sum += hist->GetBinContent(CurrBin); - } - const double sigma_m = std::fabs(hist->GetBinCenter(CurrBin)-hist->GetBinLowEdge(MaxBin)); - - // Now do the double sided HPD - //KS: Start sum from the HPD - sum = hist->GetBinContent(MaxBin); - int LowBin = MaxBin; - int HighBin = MaxBin; - long double LowCon = 0.0; - long double HighCon = 0.0; - - while (sum/Integral < coverage && (LowBin > 0 || HighBin < hist->GetNbinsX()+1)) - { - LowCon = 0.0; - HighCon = 0.0; - //KS:: Move further only if you haven't reached histogram end - if(LowBin > 1) - { - LowBin--; - LowCon = hist->GetBinContent(LowBin); - } - if(HighBin < hist->GetNbinsX()) - { - HighBin++; - HighCon = hist->GetBinContent(HighBin); - } - - // If we're on the last slice and the lower contour is larger than the upper - if ((sum+LowCon+HighCon)/Integral > coverage && LowCon > HighCon) { - sum += LowCon; - break; - // If we're on the last slice and the upper contour is larger than the lower - } else if ((sum+LowCon+HighCon)/Integral > coverage && HighCon >= LowCon) { - sum += HighCon; - break; - } else { - sum += LowCon + HighCon; - } - } - - double sigma_hpd = 0.0; - if (LowCon > HighCon) { - sigma_hpd = std::fabs(hist->GetBinLowEdge(LowBin)-hist->GetBinCenter(MaxBin)); - } else { - sigma_hpd = std::fabs(hist->GetXaxis()->GetBinUpEdge(HighBin)-hist->GetBinCenter(MaxBin)); - } +void GetHPD(TH1D* const hist, double& Mean, double& Error, double& Error_p, double& Error_m, const double coverage = 0.6827); - Mean = peakval; - Error = sigma_hpd; - Error_p = sigma_p; - Error_m = sigma_m; -} - -// *************** /// @brief KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading /// @param hist histograms based on which we calculate credible interval /// @param coverage What is defined coverage, by default 0.6827 (1 sigma) -inline void GetCredibleInterval(TH1D* const hist, TH1D* hpost_copy, const double coverage = 0.6827) { -// *************** - if(coverage > 1) - { - MACH3LOG_ERROR("Specified Credible Interval is greater that 1 and equal to {} Should be between 0 and 1", coverage); - throw MaCh3Exception(__FILE__ , __LINE__ ); - } - //KS: Reset first copy of histogram - hpost_copy->Reset(""); - hpost_copy->Fill(0.0, 0.0); - - //KS: Temporary structure to be thread save - std::vector hist_copy(hist->GetXaxis()->GetNbins()+1); - std::vector hist_copy_fill(hist->GetXaxis()->GetNbins()+1); - for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i) - { - hist_copy[i] = hist->GetBinContent(i); - hist_copy_fill[i] = false; - } - - /// Loop over histogram bins with highest number of entries until covered 90 or 68.3% - const long double Integral = hist->Integral(); - long double sum = 0; +void GetCredibleInterval(TH1D* const hist, TH1D* hpost_copy, const double coverage = 0.6827); - while ((sum / Integral) < coverage) - { - /// Get bin of highest content and save the number of entries reached so far - int max_entry_bin = 0; - double max_entries = 0.; - for (int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i) - { - if (hist_copy[i] > max_entries) - { - max_entries = hist_copy[i]; - max_entry_bin = i; - } - } - /// Replace bin value by -1 so it is not looped over as being maximum bin again - hist_copy[max_entry_bin] = -1.; - hist_copy_fill[max_entry_bin] = true; - - sum += max_entries; - } - //KS: Now fill our copy only for bins which got included in coverage region - for(int i = 0; i <= hist->GetXaxis()->GetNbins(); ++i) - { - if(hist_copy_fill[i]) hpost_copy->SetBinContent(i, hist->GetBinContent(i)); - } -} - -// *************** /// @brief KS: Get 1D histogram within credible interval, hpost_copy has to have the same binning, I don't do Copy() as this will lead to problems if this is used under multithreading /// @param hist histograms based on which we calculate credible interval /// @param CredibleInSigmas Whether interval is in sigmas or percentage /// @param coverage What is defined coverage, by default 0.6827 (1 sigma) -inline void GetCredibleIntervalSig(TH1D* const hist, TH1D* hpost_copy, const bool CredibleInSigmas, const double coverage = 0.6827) { - // *************** - //KS: Slightly different approach depending if intervals are in percentage or sigmas - if(CredibleInSigmas) { - //KS: Convert sigmas into percentage - const double CredReg = GetSigmaValue(int(std::round(coverage))); - GetCredibleInterval(hist, hpost_copy, CredReg); - } else { - GetCredibleInterval(hist, hpost_copy, coverage); - } -} +void GetCredibleIntervalSig(TH1D* const hist, TH1D* hpost_copy, const bool CredibleInSigmas, const double coverage = 0.6827); -// *************** /// @brief KS: Set 2D contour within some coverage /// @param hist2D histograms based on which we calculate credible regions /// @param CredibleInSigmas Whether interval is in sigmas or percentage /// @param coverage What is defined coverage, by default 0.6827 (1 sigma) -inline void GetCredibleRegion(TH2D* const hist2D, const double coverage = 0.6827) { -// *************** - if(coverage > 1) - { - MACH3LOG_ERROR("Specified Credible Region is greater than 1 and equal to {:.2f} Should be between 0 and 1 {}", coverage); - throw MaCh3Exception(__FILE__ , __LINE__ ); - } - - //KS: Temporary structure to be thread save - std::vector> hist_copy(hist2D->GetXaxis()->GetNbins()+1, - std::vector(hist2D->GetYaxis()->GetNbins()+1)); - for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i) { - for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j) { - hist_copy[i][j] = hist2D->GetBinContent(i, j); - } - } +void GetCredibleRegion(TH2D* const hist2D, const double coverage = 0.6827); - /// Loop over histogram bins with highest number of entries until covered 90 or 68.3% - const long double Integral = hist2D->Integral(); - long double sum = 0; - - //We need to as ROOT requires array to set to contour - double Contour[1]; - while ((sum / Integral) < coverage) - { - /// Get bin of highest content and save the number of entries reached so far - int max_entry_bin_x = 0; - int max_entry_bin_y = 0; - double max_entries = 0.; - for (int i = 0; i <= hist2D->GetXaxis()->GetNbins(); ++i) - { - for (int j = 0; j <= hist2D->GetYaxis()->GetNbins(); ++j) - { - if (hist_copy[i][j] > max_entries) - { - max_entries = hist_copy[i][j]; - max_entry_bin_x = i; - max_entry_bin_y = j; - } - } - } - /// Replace bin value by -1 so it is not looped over as being maximum bin again - hist_copy[max_entry_bin_x][max_entry_bin_y] = -1.; - - sum += max_entries; - Contour[0] = max_entries; - } - hist2D->SetContour(1, Contour); -} - -// *************** /// @brief KS: Set 2D contour within some coverage /// @param hist2D histograms based on which we calculate credible regions /// @param coverage What is defined coverage, by default 0.6827 (1 sigma) -inline void GetCredibleRegionSig(TH2D* const hist2D, const bool CredibleInSigmas, const double coverage = 0.6827) { - // *************** - if(CredibleInSigmas) { - //KS: Convert sigmas into percentage - const double CredReg = GetSigmaValue(int(std::round(coverage))); - GetCredibleRegion(hist2D, CredReg); - } else { - GetCredibleRegion(hist2D, coverage); - } -} +void GetCredibleRegionSig(TH2D* const hist2D, const bool CredibleInSigmas, const double coverage = 0.6827); -// ********************* /// @brief Interquartile Range (IQR) /// @param hist histograms from which we IQR -inline double GetIQR(TH1D *Hist) { -// ********************* - if(Hist->Integral() == 0) return 0.0; - - constexpr double quartiles_x[3] = {0.25, 0.5, 0.75}; - double quartiles[3]; - - Hist->GetQuantiles(3, quartiles, quartiles_x); +double GetIQR(TH1D *Hist); - return quartiles[2] - quartiles[0]; -} - -// ******************** /// @brief Compute the Kullback-Leibler divergence between two TH2Poly histograms. /// /// @param DataPoly Pointer to the data histogram (TH2Poly). /// @param PolyMC Pointer to the Monte Carlo histogram (TH2Poly). /// @return The Kullback-Leibler divergence value. Returns 0 if the data or MC integral is zero. -inline double ComputeKLDivergence(TH2Poly* DataPoly, TH2Poly* PolyMC) { -// ********************* - double klDivergence = 0.0; - double DataIntegral = NoOverflowIntegral(DataPoly); - double MCIntegral = NoOverflowIntegral(PolyMC); - for (int i = 1; i < DataPoly->GetNumberOfBins()+1; ++i) - { - if (DataPoly->GetBinContent(i) > 0 && PolyMC->GetBinContent(i) > 0) { - klDivergence += DataPoly->GetBinContent(i) / DataIntegral * - std::log((DataPoly->GetBinContent(i) / DataIntegral) / ( PolyMC->GetBinContent(i) / MCIntegral)); - } - } - return klDivergence; -} -// ******************** +double ComputeKLDivergence(TH2Poly* DataPoly, TH2Poly* PolyMC); + /// @brief KS: Combine p-values using Fisher's method. /// /// @param pvalues A vector of individual p-values to combine. /// @return The combined p-value, representing the overall significance. -inline double FisherCombinedPValue(const std::vector& pvalues) { -// ******************** - - double testStatistic = 0; - for(size_t i = 0; i < pvalues.size(); i++) - { - const double pval = std::max(0.00001, pvalues[i]); - testStatistic += -2.0 * std::log(pval); - } - // Degrees of freedom is twice the number of p-values - int degreesOfFreedom = int(2 * pvalues.size()); - double pValue = TMath::Prob(testStatistic, degreesOfFreedom); +double FisherCombinedPValue(const std::vector& pvalues); - return pValue; -} - -// ******************** /// @brief Thin MCMC Chain, to save space and maintain low autocorrelations. /// /// @param FilePath Path to MCMC chain you want to thin /// @param ThinningCut every which entry you want to thin /// @cite 2011ThinningMCMC /// @warning Thinning is done over entry not steps, it may now work very well for merged chains -inline void ThinningMCMC(const std::string& FilePath, const int ThinningCut) { -// ******************** - // Define the path for the temporary thinned file - std::string TempFilePath = "Thinned_" + FilePath; - int ret = system(("cp " + FilePath + " " + TempFilePath).c_str()); - if (ret != 0) { - MACH3LOG_WARN("Error: system call to copy file failed with code {}", ret); - } - - TFile *inFile = TFile::Open(TempFilePath.c_str(), "UPDATE"); - if (!inFile || inFile->IsZombie()) { - MACH3LOG_ERROR("Error opening file: {}", TempFilePath); - throw MaCh3Exception(__FILE__, __LINE__); - } - - TTree *inTree = inFile->Get("posteriors"); - if (!inTree) { - MACH3LOG_ERROR("Error: TTree 'posteriors' not found in file."); - inFile->ls(); - inFile->Close(); - throw MaCh3Exception(__FILE__, __LINE__); - } +void ThinningMCMC(const std::string& FilePath, const int ThinningCut); - // Clone the structure without data - TTree *outTree = inTree->CloneTree(0); - - // Loop over entries and apply thinning - Long64_t nEntries = inTree->GetEntries(); - double retainedPercentage = (double(nEntries) / ThinningCut) / double(nEntries) * 100; - MACH3LOG_INFO("Thinning will retain {:.2f}% of chains", retainedPercentage); - for (Long64_t i = 0; i < nEntries; i++) { - if (i % (nEntries/10) == 0) { - MaCh3Utils::PrintProgressBar(i, nEntries); - } - if (i % ThinningCut == 0) { - inTree->GetEntry(i); - outTree->Fill(); - } - } - inFile->WriteTObject(outTree, "posteriors", "kOverwrite"); - inFile->Close(); - delete inFile; +/// @brief Compute the Z-score for a given value. +/// +/// The Z-score indicates how many standard deviations a value is from the mean. +/// A positive Z-score means the value is above the mean, while a negative Z-score +/// means it is below the mean. +/// +/// @param value The data point for which to compute the Z-score. +/// @param mean The mean of the data set. +/// @param stddev The standard deviation of the data set. Must be non-zero. +/// @return The Z-score of the given value. +/// @warning Ensure that stddev is not zero to avoid division by zero. +double GetZScore(const double value, const double mean, const double stddev); + +/// @brief Compute the P-value from a given Z-score. +/// +/// The P-value represents the probability of observing a value as extreme as +/// the given Z-score under the null hypothesis of a standard normal distribution. +/// +/// @param zScore The Z-score for which to compute the P-value. +/// @return The P-value corresponding to the given Z-score. +double GetPValueFromZScore(const double zScore); - MACH3LOG_INFO("Thinned TTree saved and overwrote original in: {}", TempFilePath); -} +/// @brief Get the mode error from a TH1D +/// @param hpost hist from which we extract mode error +double GetModeError(TH1D* hpost); diff --git a/samplePDF/HistogramUtils.cpp b/samplePDF/HistogramUtils.cpp index 0ccac03cb..faa4e4493 100644 --- a/samplePDF/HistogramUtils.cpp +++ b/samplePDF/HistogramUtils.cpp @@ -212,6 +212,44 @@ TH2Poly* NormalisePoly(TH2Poly *Histogram) { return HistCopy; } +// **************** +// Normalise a TH2Poly +void NormaliseTH2Poly(TH2Poly* Histogram){ +// **************** + const double Integral = NoOverflowIntegral(Histogram); + for(int j = 1; j < Histogram->GetNumberOfBins()+1; j++) + { + Histogram->SetBinContent(j, Histogram->GetBinContent(j)/Integral); + } +} + +// **************** +// Make a ratio histogram +template +HistType* RatioHists(HistType *NumHist, HistType *DenomHist) { +// **************** + HistType *NumCopy = static_cast(NumHist->Clone()); + std::string title = std::string(DenomHist->GetName()) + "_ratio"; + NumCopy->SetNameTitle(title.c_str(), title.c_str()); + NumCopy->Divide(DenomHist); + + return NumCopy; +} + +// **************** +// Make a ratio th2poly +TH2Poly* RatioPolys(TH2Poly *NumHist, TH2Poly *DenomHist) { +// **************** + TH2Poly *NumCopy = static_cast(NumHist->Clone()); + std::string title = std::string(DenomHist->GetName()) + "_ratio"; + NumCopy->SetNameTitle(title.c_str(), title.c_str()); + + for(int i = 1; i < NumCopy->GetNumberOfBins()+1; ++i) { + NumCopy->SetBinContent(i,NumHist->GetBinContent(i)/DenomHist->GetBinContent(i)); + } + return NumCopy; +} + // **************** TH2D* ConvertTH2PolyToTH2D(TH2Poly *poly, TH2D *h2dhist) { // **************** @@ -368,6 +406,112 @@ void RemoveFitter(TH1D* hist, const std::string& name) { delete fitter; } +// **************** +// Make Poisson Fluctuation of TH1D hist +void MakeFluctuatedHistogramStandard(TH1D *FluctHist, TH1D* PolyHist, TRandom3* rand){ +// **************** + // Make the Poisson fluctuated hist + FluctHist->Reset(""); + FluctHist->Fill(0.0, 0.0); + + for (int i = 1; i <= PolyHist->GetXaxis()->GetNbins(); ++i) + { + // Get the posterior predictive bin content + const double MeanContent = PolyHist->GetBinContent(i); + // Get a Poisson fluctuation of the content + const double Random = rand->PoissonD(MeanContent); + // Set the fluctuated histogram content to the Poisson variation of the posterior predictive histogram + FluctHist->SetBinContent(i,Random); + } +} + +// **************** +// Make Poisson Fluctuation of TH2Poly hist +void MakeFluctuatedHistogramStandard(TH2Poly *FluctHist, TH2Poly* PolyHist, TRandom3* rand) { +// **************** + // Make the Poisson fluctuated hist + FluctHist->Reset(""); + FluctHist->Fill(0.0, 0.0, 0.0); + + for (int i = 1; i < FluctHist->GetNumberOfBins()+1; ++i) + { + // Get the posterior predictive bin content + const double MeanContent = PolyHist->GetBinContent(i); + // Get a Poisson fluctuation of the content + const double Random = rand->PoissonD(MeanContent); + // Set the fluctuated histogram content to the Poisson variation of the posterior predictive histogram + FluctHist->SetBinContent(i,Random); + } +} + +// **************** +// Make Poisson Fluctuation of TH1D hist +void MakeFluctuatedHistogramAlternative(TH1D* FluctHist, TH1D* PolyHist, TRandom3* rand){ +// **************** + // Make the Poisson fluctuated hist + FluctHist->Reset(""); + FluctHist->Fill(0.0, 0.0); + + const double evrate = PolyHist->Integral(); + const int num = rand->Poisson(evrate); + int count = 0; + while(count < num) + { + const double candidate = PolyHist->GetRandom(); + FluctHist->Fill(candidate); + count++; + } +} + +// **************** +//KS: ROOT developers were too lazy do develop getRanom2 for TH2Poly, this implementation is based on: +// https://root.cern.ch/doc/master/classTH2.html#a883f419e1f6899f9c4255b458d2afe2e +int GetRandomPoly2(const TH2Poly* PolyHist, TRandom3* rand){ +// **************** + const int nbins = PolyHist->GetNumberOfBins(); + const double r1 = rand->Rndm(); + + double* fIntegral = new double[nbins+2]; + fIntegral[0] = 0.0; + + //KS: This is custom version of ComputeIntegral, once again ROOT was lazy :( + for (int i = 1; i < nbins+1; ++i) + { + fIntegral[i] = 0.0; + const double content = PolyHist->GetBinContent(i); + fIntegral[i] += fIntegral[i - 1] + content; + } + for (Int_t bin = 1; bin < nbins+1; ++bin) fIntegral[bin] /= fIntegral[nbins]; + fIntegral[nbins+1] = PolyHist->GetEntries(); + + //KS: We just return one rather then X and Y, this way we can use SetBinContent rather than Fill, which is faster + int iBin = int(TMath::BinarySearch(nbins, fIntegral, r1)); + //KS: Have to increment because TH2Poly has stupid offset arghh + iBin += 1; + + delete[] fIntegral; + return iBin; +} + +// **************** +// Make Poisson fluctuation of TH2Poly hist +void MakeFluctuatedHistogramAlternative(TH2Poly *FluctHist, TH2Poly* PolyHist, TRandom3* rand){ +// **************** + // Make the Poisson fluctuated hist + FluctHist->Reset(""); + FluctHist->Fill(0.0, 0.0, 0.0); + + const double evrate = NoOverflowIntegral(PolyHist); + const int num = rand->Poisson(evrate); + int count = 0; + while(count < num) + { + const int iBin = GetRandomPoly2(PolyHist, rand); + FluctHist->SetBinContent(iBin, FluctHist->GetBinContent(iBin) + 1); + count++; + } +} + // ************************* TGraphAsymmErrors* MakeAsymGraph(TH1D* sigmaArrayLeft, TH1D* sigmaArrayCentr, TH1D* sigmaArrayRight, const std::string& title) { // ************************* @@ -399,6 +543,18 @@ TGraphAsymmErrors* MakeAsymGraph(TH1D* sigmaArrayLeft, TH1D* sigmaArrayCentr, TH return var; } +// **************** +//Fast and thread safe fill of violin histogram, it assumes both histograms have the same binning +void FastViolinFill(TH2D* violin, TH1D* hist_1d){ +// **************** + for (int x = 0; x < violin->GetXaxis()->GetNbins(); ++x) + { + const int y = violin->GetYaxis()->FindBin(hist_1d->GetBinContent(x+1)); + violin->SetBinContent(x+1, y, violin->GetBinContent(x+1, y)+1); + } +} + + // **************** //DB Get the Cherenkov momentum threshold in MeV double returnCherenkovThresholdMomentum(int PDG) { diff --git a/samplePDF/HistogramUtils.h b/samplePDF/HistogramUtils.h index f71eb9787..c61174add 100644 --- a/samplePDF/HistogramUtils.h +++ b/samplePDF/HistogramUtils.h @@ -11,6 +11,7 @@ #include "TGraphAsymmErrors.h" #include "TLorentzVector.h" #include "TObjString.h" +#include "TRandom3.h" #pragma GCC diagnostic pop // MaCh3 inlcudes @@ -38,11 +39,22 @@ TH2Poly* ConvertTH2DtoTH2Poly(TH2D *TH2Dhist); /// @brief WP: Helper to Normalise histograms TH2Poly* NormalisePoly(TH2Poly* Histogram); +/// @brief Helper to Normalise histograms +/// @param Histogram hist which we normalise +void NormaliseTH2Poly(TH2Poly* Histogram); + /// @brief WP: Helper to scale th2poly analogous to th2d scale with option "width" TH2Poly* PolyScaleWidth(TH2Poly *Histogram, double scale); + /// @brief WP: Helper to calc integral of th2poly analogous to th2d integra; with option "width" double PolyIntegralWidth(TH2Poly *Histogram); +/// @brief Helper to make ratio histograms +template HistType* RatioHists(HistType* NumHist, HistType* DenomHist); + +/// @brief Helper to make ratio of TH2Polys +TH2Poly* RatioPolys(TH2Poly* NumPoly, TH2Poly* DenomPoly); + /// @brief WP: Helper function to create TH2Poly histogram with uniform binning /// @param name This will be tittle of output histogram /// @param BinArray_x Bin edges for X axis @@ -57,6 +69,19 @@ void CheckTH2PolyFileVersion(TFile *file); /// @brief KS: Remove fitted TF1 from hist to make comparison easier void RemoveFitter(TH1D* hist, const std::string& name); +/// @brief Make Poisson fluctuation of TH1D hist using default fast method +void MakeFluctuatedHistogramStandard(TH1D *FluctHist, TH1D* PolyHist, TRandom3* rand); +/// @brief Make Poisson fluctuation of TH1D hist using slow method which is only for cross-check +void MakeFluctuatedHistogramAlternative(TH1D *FluctHist, TH1D* PolyHist, TRandom3* rand); + +/// @brief Make Poisson fluctuation of TH2Poly hist using default fast method +void MakeFluctuatedHistogramStandard(TH2Poly *FluctHist, TH2Poly* PolyHist, TRandom3* rand); +/// @brief Make Poisson fluctuation of TH2Poly hist using slow method which is only for cross-check +void MakeFluctuatedHistogramAlternative(TH2Poly *FluctHist, TH2Poly* PolyHist, TRandom3* rand); + +/// @brief KS: ROOT developers were too lazy do develop getRanom2 for TH2Poly, this implementation is based on [link](https://root.cern.ch/doc/master/classTH2.html#a883f419e1f6899f9c4255b458d2afe2e) +int GetRandomPoly2(const TH2Poly* PolyHist, TRandom3* rand); + /// @brief Used by sigma variation, check how 1 sigma changes spectra /// @param sigmaArrayLeft sigma var hist at -1 or -3 sigma shift /// @param sigmaArrayCentr sigma var hist at prior values @@ -65,6 +90,11 @@ void RemoveFitter(TH1D* hist, const std::string& name); /// @return A `TGraphAsymmErrors` object that visualizes the sigma variation of spectra, showing confidence intervals between different sigma shifts. TGraphAsymmErrors* MakeAsymGraph(TH1D* sigmaArrayLeft, TH1D* sigmaArrayCentr, TH1D* sigmaArrayRight, const std::string& title); +/// @brief KS: Fill Violin histogram with entry from a toy +/// @param violin hist that will be filled +/// @param hist_1d refence hist from which we take entries to be filled +void FastViolinFill(TH2D* violin, TH1D* hist_1d); + /// @brief Helper to check if files exist or not inline std::string file_exists(std::string filename) { std::ifstream infile(filename.c_str());