Skip to content

Commit

Permalink
Implement simple MCMC Averaging
Browse files Browse the repository at this point in the history
  • Loading branch information
KSkwarczynski committed Nov 9, 2024
1 parent c35d411 commit 7fb5b6b
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 40 deletions.
2 changes: 2 additions & 0 deletions .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Henry Wallace <[email protected]> Henry Wallace <67589487+henry-wallace-ph
Henry Wallace <[email protected]> Henry Wallace <[email protected]>
Henry Wallace <[email protected]> Henry Wallace <[email protected]>
Henry Wallace <[email protected]> Henry Wallace <[email protected]>
Henry Wallace <[email protected]> Henry Wallace <[email protected]>
Henry Wallace <[email protected]> Henry Wallace <[email protected]>

# Ewan Miller
Ewan Miller <[email protected]> Ewan <[email protected]>
Expand Down
5 changes: 1 addition & 4 deletions Diagnostics/DiagMCMC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ void DiagMCMC(const std::string& inputFile, const std::string& config)
YAML::Node Settings = YAML::LoadFile(config);

// Make the processor
MCMCProcessor* Processor = new MCMCProcessor(inputFile);

auto Processor = std::make_unique<MCMCProcessor>(inputFile);
Processor->SetOutputSuffix("_MCMC_Diag");
//KS:Turn off plotting detector and some other setting
Processor->SetExcludedTypes(GetFromManager<std::vector<std::string>>(Settings["DiagMCMC"]["ExcludedTypes"], {""}));
Expand All @@ -27,8 +26,6 @@ void DiagMCMC(const std::string& inputFile, const std::string& config)

//KS: finally call main method
Processor->DiagMCMC();

delete Processor;
}

int main(int argc, char *argv[]) {
Expand Down
51 changes: 18 additions & 33 deletions Diagnostics/ProcessMCMC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ inline void ReweightPrior(MCMCProcessor* Processor);
/// @brief KS: Convert TMatrix to TH2D, mostly useful for making fancy plots
inline TH2D* TMatrixIntoTH2D(TMatrixDSym* Matrix, const std::string& title);
/// @brief KS: Perform KS test to check if two posteriors for the same parameter came from the same distribution
inline void KolmogorovSmirnovTest(MCMCProcessor** Processor, TCanvas* Posterior, TString canvasname);
inline void KolmogorovSmirnovTest(const std::vector<std::unique_ptr<MCMCProcessor>>& Processor, TCanvas* Posterior, TString canvasname);

int nFiles;
std::vector <std::string> FileNames;
Expand Down Expand Up @@ -96,7 +96,9 @@ void ProcessMCMC(const std::string& inputFile)
if(Settings["Thinning"])
{
bool DoThin = Settings["Thinning"][0].as<bool>();
if(DoThin) Processor->ThinMCMC(Settings["Thinning"][1].as<int>());
if(DoThin){
Processor->ThinMCMC(Settings["Thinning"][1].as<int>(), GetFromManager<bool>(Settings["Average"], false));
}
}
// Make the postfit
Processor->MakePostfit();
Expand Down Expand Up @@ -152,16 +154,15 @@ void MultipleProcessMCMC()
YAML::Node card_yaml = YAML::LoadFile(config.c_str());
YAML::Node Settings = card_yaml["ProcessMCMC"];

const Color_t PosteriorColor[] = {kBlue-1, kRed, kGreen+2};
//const Style_t PosteriorStyle[] = {kSolid, kDashed, kDotted};
constexpr Color_t PosteriorColor[] = {kBlue-1, kRed, kGreen+2};
//constexpr Style_t PosteriorStyle[] = {kSolid, kDashed, kDotted};
nFiles = int(FileNames.size());
MCMCProcessor** Processor;
Processor = new MCMCProcessor*[nFiles];
std::vector<std::unique_ptr<MCMCProcessor>> Processor(nFiles);
for (int ik = 0; ik < nFiles; ik++)
{
MACH3LOG_INFO("File for study: {}", FileNames[ik]);
// Make the processor
Processor[ik] = new MCMCProcessor(FileNames[ik]);
Processor[ik] = std::make_unique<MCMCProcessor>(FileNames[ik]);
Processor[ik]->SetOutputSuffix(("_" + std::to_string(ik)).c_str());

Processor[ik]->SetExcludedTypes(GetFromManager<std::vector<std::string>>(Settings["ExcludedTypes"], {""}));
Expand Down Expand Up @@ -212,8 +213,8 @@ void MultipleProcessMCMC()
for(int i = 0; i < Processor[0]->GetNParams(); ++i)
{
// This holds the posterior density
TH1D **hpost = new TH1D*[nFiles];
TLine **hpd = new TLine*[nFiles];
std::vector<TH1D*> hpost(nFiles);
std::vector<TLine*> hpd(nFiles);
hpost[0] = static_cast<TH1D *>(Processor[0]->GetHpost(i)->Clone());

bool Skip = false;
Expand All @@ -235,8 +236,6 @@ void MultipleProcessMCMC()
for (int ik = 0; ik < nFiles; ik++)
delete hpost[ik];

delete[] hpost;
delete[] hpd;
continue;
}
for (int ik = 0; ik < nFiles; ik++)
Expand All @@ -258,20 +257,20 @@ void MultipleProcessMCMC()
Processor[0]->GetNthParameter(i, Prior, PriorError, Title);

// Now make the TLine for the Asimov
TLine *Asimov = new TLine(Prior, hpost[0]->GetMinimum(), Prior, hpost[0]->GetMaximum());
auto Asimov = std::make_unique<TLine>(Prior, hpost[0]->GetMinimum(), Prior, hpost[0]->GetMaximum());
Asimov->SetLineColor(kRed-3);
Asimov->SetLineWidth(2);
Asimov->SetLineStyle(kDashed);

// Make a nice little TLegend
TLegend *leg = new TLegend(0.12, 0.7, 0.6, 0.97);
auto leg = std::make_unique<TLegend>(0.12, 0.7, 0.6, 0.97);
leg->SetTextSize(0.03f);
leg->SetFillColor(0);
leg->SetFillStyle(0);
leg->SetLineColor(0);
leg->SetLineStyle(0);
TString asimovLeg = Form("#splitline{Prior}{x = %.2f , #sigma = %.2f}", Prior, PriorError);
leg->AddEntry(Asimov, asimovLeg, "l");
leg->AddEntry(Asimov.get(), asimovLeg, "l");

for (int ik = 0; ik < nFiles; ik++)
{
Expand All @@ -296,16 +295,11 @@ void MultipleProcessMCMC()
leg->Draw("same");
Posterior->cd();
Posterior->Print(canvasname);

delete Asimov;
delete leg;
for (int ik = 0; ik < nFiles; ik++)
{
delete hpost[ik];
delete hpd[ik];
}
delete[] hpost;
delete[] hpd;
}//End loop over parameters

// Finally draw the parameter plot onto the PDF
Expand All @@ -321,8 +315,6 @@ void MultipleProcessMCMC()
Posterior->Print(canvasname);

delete Posterior;
for (int ik = 0; ik < nFiles; ik++) delete Processor[ik];
delete[] Processor;
}

// KS: Calculate Bayes factor for a given hypothesis, most informative are those related to osc params. However, it make relative easy interpretation for switch dials
Expand Down Expand Up @@ -627,7 +619,7 @@ TH2D* TMatrixIntoTH2D(TMatrixDSym* Matrix, const std::string& title)
}

//KS: Perform KS test to check if two posteriors for the same parameter came from the same distribution
void KolmogorovSmirnovTest(MCMCProcessor** Processor, TCanvas* Posterior, TString canvasname)
void KolmogorovSmirnovTest(const std::vector<std::unique_ptr<MCMCProcessor>>& Processor, TCanvas* Posterior, TString canvasname)
{
const Color_t CumulativeColor[] = {kBlue-1, kRed, kGreen+2};
const Style_t CumulativeStyle[] = {kSolid, kDashed, kDotted};
Expand Down Expand Up @@ -702,12 +694,9 @@ void KolmogorovSmirnovTest(MCMCProcessor** Processor, TCanvas* Posterior, TStrin
CumulativeDistribution[ik]->SetBinContent(NumberOfBins+1, 1.);
}

int* TestStatBin = new int[nFiles];
double* TestStatD = new double[nFiles];
TLine **LineD = new TLine*[nFiles];

for (int ik = 0 ; ik < nFiles; ik++) { TestStatBin[ik] = 0; TestStatD[ik] = -999;}

std::vector<int> TestStatBin(nFiles, 0);
std::vector<double> TestStatD(nFiles, -999);
std::vector<TLine *> LineD(nFiles);
//Find KS statistic
for (int ik = 1 ; ik < nFiles; ik++)
{
Expand Down Expand Up @@ -736,7 +725,7 @@ void KolmogorovSmirnovTest(MCMCProcessor** Processor, TCanvas* Posterior, TStrin
for (int ik = 0 ; ik < nFiles; ik++)
CumulativeDistribution[ik]->Draw("SAME");

TLegend *leg = new TLegend(0.15, 0.7, 0.5, 0.90);
auto leg = std::make_unique<TLegend>(0.15, 0.7, 0.5, 0.90);
leg->SetTextSize(0.04f);
for (int ik = 0; ik < nFiles; ik++)
leg->AddEntry(CumulativeDistribution[ik], TitleNames[ik].c_str(), "l");
Expand All @@ -755,7 +744,6 @@ void KolmogorovSmirnovTest(MCMCProcessor** Processor, TCanvas* Posterior, TStrin
Posterior->cd();
Posterior->Print(canvasname);

delete leg;
for (int ik = 0; ik < nFiles; ik++)
{
delete hpost[ik];
Expand All @@ -764,8 +752,5 @@ void KolmogorovSmirnovTest(MCMCProcessor** Processor, TCanvas* Posterior, TStrin
}
delete[] hpost;
delete[] CumulativeDistribution;
delete[] LineD;
delete[] TestStatBin;
delete[] TestStatD;
} //End loop over parameter
}
11 changes: 11 additions & 0 deletions Doc/bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,14 @@ @article{2011ThinningMCMC
doi = {10.1111/j.2041-210X.2011.00131.x},
url = {https://doi.org/10.1111/j.2041-210X.2011.00131.x},
}

@article{nesterov2009primal,
author = {Nesterov, Yurii},
title = {Primal-dual subgradient methods for convex problems},
journal = {Mathematical Programming},
volume = {120},
pages = {221--259},
year = {2009},
doi = {10.1007/s10107-007-0149-x},
note = {Received 29 September 2005; Accepted 13 January 2007; Published 19 June 2007; Issue Date August 2009}
}
3 changes: 1 addition & 2 deletions mcmc/MCMCProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,7 @@ void MCMCProcessor::MakePostfit() {
// nDraw is number of draws we want to do
for (int i = 0; i < nDraw; ++i)
{
if (i % (nDraw/5) == 0)
{
if (i % (nDraw/5) == 0) {
MaCh3Utils::PrintProgressBar(i, nDraw);
}
OutputFile->cd();
Expand Down
9 changes: 8 additions & 1 deletion mcmc/MCMCProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,14 @@ class MCMCProcessor {

/// @brief Thin MCMC Chain, to save space and maintain low autocorrelations.
/// @param ThinningCut every which entry you want to thin
inline void ThinMCMC(const int ThinningCut) {ThinningMCMC(MCMCFile+".root", ThinningCut);};
/// @param Average If true will perform MCMC averaging instead of thinning
inline void ThinMCMC(const int ThinningCut, const bool Average = false) {
if(Average){
AveragingMCMC(MCMCFile+".root", ThinningCut);
} else {
ThinningMCMC(MCMCFile+".root", ThinningCut);
}
};

/// @brief KS: Perform MCMC diagnostic including Autocorrelation, Trace etc.
void DiagMCMC();
Expand Down
89 changes: 89 additions & 0 deletions mcmc/StatisticalUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -584,3 +584,92 @@ inline void ThinningMCMC(const std::string& FilePath, const int ThinningCut) {

MACH3LOG_INFO("Thinned TTree saved and overwrote original in: {}", TempFilePath);
}


// ********************
/// @brief Average MCMC Chain entries, to reduce autocorrelation and improve estimate stability.
///
/// @param FilePath Path to the MCMC chain you want to average
/// @param AveragingCut Number of consecutive entries to average
/// @cite nesterov2009primal
/// @warning Averaging is done over entries, so this may not work well for merged chains or non-sequential entries.
/// @note The resulting file overwrites the original file.
inline void AveragingMCMC(const std::string& FilePath, const int AveragingCut) {
// ********************
if (AveragingCut <= 1) {
MACH3LOG_WARN("Invalid AveragingCut value ({}). It must be greater than 1.", AveragingCut);
throw MaCh3Exception(__FILE__, __LINE__);
}

// Define the path for the temporary averaged file
std::string TempFilePath = "Averaged_" + FilePath;
int ret = system(("cp " + FilePath + " " + TempFilePath).c_str());
if (ret != 0) {
MACH3LOG_WARN("Error: system call to copy file failed with code {}", ret);
}

TFile *inFile = TFile::Open(TempFilePath.c_str(), "UPDATE");
if (!inFile || inFile->IsZombie()) {
MACH3LOG_ERROR("Error opening file: {}", TempFilePath);
throw MaCh3Exception(__FILE__, __LINE__);
}

TTree *inTree = inFile->Get<TTree>("posteriors");
if (!inTree) {
MACH3LOG_ERROR("Error: TTree 'posteriors' not found in file.");
inFile->ls();
inFile->Close();
throw MaCh3Exception(__FILE__, __LINE__);
}

// Clone the structure without data
TTree *outTree = inTree->CloneTree(0);
Long64_t nEntries = inTree->GetEntries();
int nBranches = inTree->GetNbranches();

std::vector<double> AvgValues(nBranches, 0.0);
std::vector<double> BranchData(nBranches, 0.0);

// Loop over branches and set up data pointers
TObjArray *branches = inTree->GetListOfBranches();
for (int b = 0; b < branches->GetEntries(); ++b) {
TBranch *branch = static_cast<TBranch*>(branches->At(b));
inTree->SetBranchAddress(branch->GetName(), &BranchData[b]);
}

MACH3LOG_INFO("Averaging every {} entries. Total entries: {}", AveragingCut, nEntries);

int batchCount = 0;
for (Long64_t i = 0; i < nEntries; i += AveragingCut) {
// Reset to 0
std::fill(AvgValues.begin(), AvgValues.end(), 0.0);

/// @todo Here we perform normal mean. But in literature there are available weighted means and more complex operations
int entriesInBatch = 0;
for (int j = 0; j < AveragingCut && (i + j) < nEntries; ++j) {
inTree->GetEntry(i + j);
for (int b = 0; b < branches->GetEntries(); ++b) {
AvgValues[b] += BranchData[b];
}
++entriesInBatch;
}
for (int b = 0; b < branches->GetEntries(); ++b) {
AvgValues[b] /= entriesInBatch;
BranchData[b] = AvgValues[b];
}

outTree->Fill();

if (i % (nEntries/10) == 0) {
MaCh3Utils::PrintProgressBar(batchCount, nEntries / AveragingCut);
}
++batchCount;
}

// Write the averaged tree back to the file
inFile->WriteTObject(outTree, "posteriors", "kOverwrite");
inFile->Close();
delete inFile;

MACH3LOG_INFO("Averaged TTree saved and overwrote original in: {}", TempFilePath);
}

0 comments on commit 7fb5b6b

Please sign in to comment.