Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Fixes for EFT combination & RobustHesse parallelisation #906

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions interface/MultiDimFit.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ class MultiDimFit : public FitterAlgoBase {
static bool robustHesse_;
static std::string robustHesseLoad_;
static std::string robustHesseSave_;
static int robustHesseSplit_;
static unsigned robustHesseIdx_;

static std::string saveSpecifiedFuncs_;
static std::string saveSpecifiedNuis_;
Expand Down
4 changes: 4 additions & 0 deletions interface/RobustHesse.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class RobustHesse {
void ProtectArgSet(RooArgSet const& set);
void ProtectVars(std::vector<std::string> const& names);

void SetSplitJob(int N, unsigned id);

int hesse();

void WriteOutputFile(std::string const& outputFileName) const;
Expand Down Expand Up @@ -98,6 +100,8 @@ class RobustHesse {
double minNllForStencils_;
double maxNllForStencils_;
unsigned maxRemovalsFromHessian_;
int nParallelJobs_;
unsigned int jobIdx_;

bool doRescale_;

Expand Down
3 changes: 2 additions & 1 deletion interface/RooEFTScalingFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ class RooEFTScalingFunction : public RooAbsReal {
protected:
std::map<std::string,double> coeffs_;
RooListProxy terms_;
std::map< std::vector<RooAbsReal *>, double> vcomponents_;
mutable std::map< std::vector<RooAbsReal *>, double> vcomponents_; //! do not serialize
double offset_;
virtual Double_t evaluate() const ;
void init() const;
private:
ClassDef(RooEFTScalingFunction,1)
};
Expand Down
7 changes: 7 additions & 0 deletions python/DatacardParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,13 @@ def addDatacardParserOptions(parser):
action="store_true",
help="Assign RooUniform pdf for flatParam NPs",
)
parser.add_option(
"--X-physics-model-process-filter",
dest="physicsModelProcessFilter",
default=False,
action="store_true",
help="Don't build PDFs for processes whose yield is set to zero by the physics model",
)


def strip(l):
Expand Down
2 changes: 1 addition & 1 deletion python/ModelTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@ def setPhysics(self, physicsModel):
self.physics.setModelBuilder(self)

def doModel(self, justCheckPhysicsModel=False):
self.physics.doParametersOfInterest()
if not justCheckPhysicsModel:
self.doObservables()
self.physics.doParametersOfInterest()

# set a group attribute on POI variables
poiIter = self.out.set("POI").createIterator()
Expand Down
3 changes: 3 additions & 0 deletions python/ShapeTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,9 @@ def prepareAllShapes(self):
continue
if p != self.options.dataname and self.DC.exp[b][p] == 0:
continue
if p != self.options.dataname and self.options.physicsModelProcessFilter:
if self.physics.getYieldScale(b, p) == 0:
continue
shape = self.getShape(b, p)
norm = 0
if shape == None: # counting experiment
Expand Down
43 changes: 43 additions & 0 deletions src/AsimovUtils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "../interface/CloseCoutSentry.h"
#include "../interface/CascadeMinimizer.h"
#include "../interface/Logger.h"
#include "../interface/ProfilingTools.h"

RooAbsData *asimovutils::asimovDatasetNominal(RooStats::ModelConfig *mc, double poiValue, int verbose) {
RooArgSet poi(*mc->GetParametersOfInterest());
Expand All @@ -38,6 +39,7 @@ RooAbsData *asimovutils::asimovDatasetWithFit(RooStats::ModelConfig *mc, RooAbsD
RooArgSet poi(*mc->GetParametersOfInterest());
RooRealVar *r = dynamic_cast<RooRealVar *>(poi.first());
r->setConstant(true); r->setVal(poiValue);
bool mvg_gen_obs = runtimedef::get("ASIMOV_MULTIVAR_GAUSS_GEN");
{
CloseCoutSentry sentry(verbose < 3);
if (mc->GetNuisanceParameters()) {
Expand Down Expand Up @@ -96,6 +98,47 @@ RooAbsData *asimovutils::asimovDatasetWithFit(RooStats::ModelConfig *mc, RooAbsD
for (RooAbsArg *a = (RooAbsArg *) iter->Next(); a != 0; a = (RooAbsArg *) iter->Next()) {
RooAbsPdf *cterm = dynamic_cast<RooAbsPdf *>(a);
if (!cterm) throw std::logic_error("AsimovUtils: a factor of the nuisance pdf is not a Pdf!");
if (mvg_gen_obs && cterm->InheritsFrom("RooMultiVarGaussian")) {
std::unique_ptr<RooArgSet> comps(cterm->getComponents());
std::unique_ptr<RooArgSet> vars(cterm->getParameters(poi));
auto iter = vars->fwdIterator();
for (RooAbsArg *a = iter.next(); a != nullptr; a = iter.next()) {
RooRealVar *rrv = dynamic_cast<RooRealVar *>(a);
if (rrv == nullptr) continue;
if (verbose > 1)
std::cout << "var: " << a->GetName() <<
", constant? " << rrv->isConstant() <<
", comp? " << (comps->find(*rrv) != nullptr) <<
", gobs? " << (gobs.find(*rrv) != nullptr) <<
", poi? " << (poi.find(*rrv) != nullptr) << std::endl;
if (poi.find(*rrv)) continue;
std::string vname = rrv->GetName();
if (vname.size() > 3 && vname.substr(vname.size() - 3) == "_In") {
auto match = comps->find(vname.substr(0, vname.size() - 3).c_str());
if (match != nullptr) {
if (verbose > 1)
std::cout << " --> matched to " << match->ClassName() << " " << match->GetName() << std::endl;
RooAbsReal *rfunc = dynamic_cast<RooAbsReal *>(match);
if (rfunc != nullptr) {
if (rfunc->getVal() == rrv->getVal()) {
if (verbose > 1)
std::cout << " --> the two already have the same " "value. nothing to do." << std::endl;
} else {
if (verbose > 1)
std::cout << " --> set " << a->GetName() << ", currently at " << rrv->getVal() << ", to " << rfunc->getVal() << std::endl;
rrv->setVal(rfunc->getVal());
}
}
if (!gobs.find(*rrv)) {
if (verbose > 1)
std::cout << " --> added to the global observables: " << std::endl;
gobs.add(*rrv);
}
}
}
}
continue;
}
if (!cterm->dependsOn(nuis)) continue; // dummy constraints
if (typeid(*cterm) == typeid(RooUniform)) continue;
std::unique_ptr<RooArgSet> cpars(cterm->getParameters(&gobs));
Expand Down
8 changes: 7 additions & 1 deletion src/MultiDimFit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ float MultiDimFit::centeredRange_ = -1.0;
bool MultiDimFit::robustHesse_ = false;
std::string MultiDimFit::robustHesseLoad_ = "";
std::string MultiDimFit::robustHesseSave_ = "";

int MultiDimFit::robustHesseSplit_ = 0;
unsigned MultiDimFit::robustHesseIdx_ = 0;

std::string MultiDimFit::saveSpecifiedFuncs_;
std::string MultiDimFit::saveSpecifiedIndex_;
Expand Down Expand Up @@ -110,6 +111,8 @@ MultiDimFit::MultiDimFit() :
("robustHesse", boost::program_options::value<bool>(&robustHesse_)->default_value(robustHesse_), "Use a more robust calculation of the hessian/covariance matrix")
("robustHesseLoad", boost::program_options::value<std::string>(&robustHesseLoad_)->default_value(robustHesseLoad_), "Load the pre-calculated Hessian")
("robustHesseSave", boost::program_options::value<std::string>(&robustHesseSave_)->default_value(robustHesseSave_), "Save the calculated Hessian")
("robustHesseSplit", boost::program_options::value<int>(&robustHesseSplit_)->default_value(robustHesseSplit_), "Split the Hessian calculation into N jobs")
("robustHesseIdx", boost::program_options::value<unsigned>(&robustHesseIdx_)->default_value(robustHesseIdx_), "Set job index (0..N-1) in split mode")
;
}

Expand Down Expand Up @@ -242,6 +245,9 @@ bool MultiDimFit::runSpecific(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooS
if (robustHesseLoad_ != "") {
robustHesse.LoadHessianFromFile(robustHesseLoad_);
}
if (robustHesseSplit_ > 0) {
robustHesse.SetSplitJob(robustHesseSplit_, robustHesseIdx_);
}
robustHesse.hesse();
if (saveFitResult_) {
res.reset(robustHesse.GetRooFitResult(res.get()));
Expand Down
43 changes: 39 additions & 4 deletions src/RobustHesse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ RobustHesse::RobustHesse(RooAbsReal &nll, unsigned verbose) : nll_(&nll), verbos
maxNllForStencils_ = 0.105;
doRescale_ = true;
maxRemovalsFromHessian_ = 20;
nParallelJobs_ = 0;
jobIdx_ = 0;
initialize();
}

Expand All @@ -53,6 +55,11 @@ void RobustHesse::initialize() {
ReplaceVars(allVars);
}

void RobustHesse::SetSplitJob(int N, unsigned id) {
nParallelJobs_ = N;
jobIdx_ = id;
}

double RobustHesse::deltaNLL() {
return nll_->getVal() - nll0_;
}
Expand Down Expand Up @@ -96,6 +103,10 @@ int RobustHesse::setParameterStencil(unsigned i) {

double valLo = x - rrv->getError();
double valHi = x + rrv->getError();
if (rrv->getError() == 0.) {
valLo = x - 1E-3;
valHi = x + 1E-3;
}

// Am I near a boundary?
double boundLo = rrv->getMin();
Expand All @@ -116,7 +127,7 @@ int RobustHesse::setParameterStencil(unsigned i) {
std::cout << ">> Parameter " << rrv->GetName() << " is too close to the upper bound: \n";
rrv->Print();
}
valHi = boundHi + 1E-7;
valHi = boundHi - 1E-7;
}

double dNllLo = deltaNLL({i}, {valLo});
Expand Down Expand Up @@ -332,7 +343,7 @@ int RobustHesse::hesse() {

std::vector<Var> validStencilVars;
for (unsigned i = 0; i < cVars_.size(); ++i) {
if (cVars_[i].stencil.size() == 3) {
if (cVars_[i].stencil.size() >= 3) {
validStencilVars.push_back(cVars_[i]);
} else {
invalidStencilVars_.push_back(cVars_[i]);
Expand All @@ -341,7 +352,7 @@ int RobustHesse::hesse() {
ReplaceVars(validStencilVars);

for (unsigned i = 0; i < cVars_.size(); ++i) {
if (cVars_[i].stencil.size() == 3) {
if (cVars_[i].stencil.size() >= 3) {
cVars_[i].d1coeffs = getFdCoeffs(1, cVars_[i].stencil);
cVars_[i].d2coeffs = getFdCoeffs(2, cVars_[i].stencil);
if (verbosity_ > 0) printf("%-80s %-10.3f %-10.3f %-10.3f | %-10.3f %-10.3f %-10.3f | %-10.3f %-10.3f %-10.3f\n",
Expand All @@ -356,7 +367,10 @@ int RobustHesse::hesse() {
hessian_ = std::unique_ptr<TMatrixDSym>(new TMatrixDSym(cVars_.size()));
unsigned ntotal = (((cVars_.size() * cVars_.size()) - cVars_.size()) / 2) + cVars_.size();
unsigned idx = 0;

int subrange = 0;
if (nParallelJobs_ > 0) {
subrange = (ntotal + nParallelJobs_ - 1) / nParallelJobs_; // ceil to int
}
if (loadFile_ != "") {
TFile fin(loadFile_.c_str());
*hessian_ = *((TMatrixDSym*)gDirectory->Get("hessian"));
Expand All @@ -366,6 +380,12 @@ int RobustHesse::hesse() {
if (idx % 100 == 0) {
if (verbosity_ > 0) std::cout << " - Done " << idx << "/" << ntotal << " terms (" << nllEvals_ << " evals, of which " << nllEvalsCached_ << " cached)\n";
}

// if (nParallelJobs_ > 0 && !((idx % nParallelJobs_) == jobIdx_)) {
if (nParallelJobs_ > 0 && !(idx >= (jobIdx_ * subrange) && idx < ((jobIdx_ + 1) * subrange))) {
++idx;
continue;
}
double term = 0.;
if (i == j) {
for (unsigned k = 0; k < cVars_[i].stencil.size(); ++k) {
Expand Down Expand Up @@ -400,6 +420,17 @@ int RobustHesse::hesse() {
if (saveFile_ != "") {
TFile fout(saveFile_.c_str(), "RECREATE");
gDirectory->WriteObject(hessian_.get(), "hessian");
std::cout << "saving hessian" << "\n";

RooArgList arglist("floatParsFinal");
RooArgList rescalefactors("rescaleFactors");
for (unsigned i = 0; i < cVars_.size(); ++i) {
arglist.addClone(*cVars_[i].v);
RooRealVar rescale(cVars_[i].v->GetName(),cVars_[i].v->GetName(),cVars_[i].rescale);
rescalefactors.addClone(rescale);
}
gDirectory->WriteObject(arglist.snapshot(), "floatParsFinal");
gDirectory->WriteObject(rescalefactors.snapshot(), "rescaleFactors");
}


Expand Down Expand Up @@ -507,8 +538,11 @@ void RobustHesse::WriteOutputFile(std::string const& outputFileName) const {
TH2F h_cor("correlation", "correlation", cVars_.size(), 0, cVars_.size(), cVars_.size(), 0,
cVars_.size());
RooArgList arglist("floatParsFinal");
RooArgList rescalefactors("rescaleFactors");
for (unsigned i = 0; i < cVars_.size(); ++i) {
arglist.addClone(*cVars_[i].v);
RooRealVar rescale(cVars_[i].v->GetName(),cVars_[i].v->GetName(),cVars_[i].rescale);
rescalefactors.addClone(rescale);
RooRealVar *rrv = dynamic_cast<RooRealVar*>(arglist.at(i));
rrv->setError(std::sqrt((*covariance_)[i][i]) * cVars_[i].rescale);
h_cov.GetXaxis()->SetBinLabel(i + 1, cVars_[i].v->GetName());
Expand All @@ -531,6 +565,7 @@ void RobustHesse::WriteOutputFile(std::string const& outputFileName) const {
gDirectory->WriteObject(&h_cov, "h_covariance");
gDirectory->WriteObject(&h_cor, "h_correlation");
gDirectory->WriteObject(arglist.snapshot(), "floatParsFinal");
gDirectory->WriteObject(rescalefactors.snapshot(), "rescaleFactors");
fout.Close();
}

Expand Down
28 changes: 15 additions & 13 deletions src/RooEFTScalingFunction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ RooEFTScalingFunction::RooEFTScalingFunction(const char *name, const char *title
}
terms_.add(*rar);
}
init();
}

void RooEFTScalingFunction::init() const {
// Loop over elements in mapping: add components to vector depending on string
for( auto const& x : coeffs ) {
for( auto const& x : coeffs_ ) {
TString term_name = x.first;
double term_prefactor = x.second;

Expand All @@ -30,30 +33,30 @@ RooEFTScalingFunction::RooEFTScalingFunction(const char *name, const char *title

// Squared-quadratic components
if( second_term == "2" ){
if( terms.find(first_term) ){
if( terms_.find(first_term) ){
std::vector<RooAbsReal *> vterms;
RooAbsReal *rar1 = dynamic_cast<RooAbsReal *>( terms.find(first_term) );
RooAbsReal *rar2 = dynamic_cast<RooAbsReal *>( terms.find(first_term) );
RooAbsReal *rar1 = dynamic_cast<RooAbsReal *>( terms_.find(first_term) );
RooAbsReal *rar2 = dynamic_cast<RooAbsReal *>( terms_.find(first_term) );
vterms.push_back(rar1);
vterms.push_back(rar2);
vcomponents_.emplace(vterms,term_prefactor);
}
} else{
// Cross-quadratic components
if( terms.find(first_term) && terms.find(second_term) ){
if( terms_.find(first_term) && terms_.find(second_term) ){
std::vector<RooAbsReal *> vterms;
RooAbsReal *rar1 = dynamic_cast<RooAbsReal *>( terms.find(first_term) );
RooAbsReal *rar2 = dynamic_cast<RooAbsReal *>( terms.find(second_term) );
RooAbsReal *rar1 = dynamic_cast<RooAbsReal *>( terms_.find(first_term) );
RooAbsReal *rar2 = dynamic_cast<RooAbsReal *>( terms_.find(second_term) );
vterms.push_back(rar1);
vterms.push_back(rar2);
vcomponents_.emplace(vterms,term_prefactor);
}
}
} else {

if( terms.find(term_name) ) {
if( terms_.find(term_name) ) {
std::vector<RooAbsReal *> vterms;
RooAbsReal *rar = dynamic_cast<RooAbsReal *>( terms.find(term_name) );
RooAbsReal *rar = dynamic_cast<RooAbsReal *>( terms_.find(term_name) );
vterms.push_back(rar);
vcomponents_.emplace(vterms,term_prefactor);
}
Expand All @@ -65,15 +68,14 @@ RooEFTScalingFunction::RooEFTScalingFunction(const RooEFTScalingFunction& other,
RooAbsReal(other, name),
coeffs_(other.coeffs_),
terms_("!terms",this,other.terms_),
vcomponents_(other.vcomponents_),
offset_(other.offset_)
{
}
{ }

Double_t RooEFTScalingFunction::evaluate() const
{
if (vcomponents_.empty()) {
return offset_;
init();
// return offset_;
}
double ret = offset_;
for (auto const &x : vcomponents_) {
Expand Down