Skip to content

Commit

Permalink
first pass with only rate systs
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Oct 4, 2023
1 parent a500c94 commit b8cce58
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 59 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ src/*
!src/HHbbVV/postprocessing/**
src/HHbbVV/postprocessing/data/*
src/HHbbVV/postprocessing/*.pkl
src/HHbbVV/postprocessing/*.txt
src/HHbbVV/postprocessing/cards
src/HHbbVV/postprocessing/templates_old
src/HHbbVV/postprocessing/condor_templates
Expand Down
111 changes: 70 additions & 41 deletions src/HHbbVV/postprocessing/CreateDatacard.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,8 +940,6 @@ def createDatacardAlphabet(args, templates_dict, templates_summed, shape_vars):

logging.info("rendering combine model")

os.system(f"mkdir -p {args.cards_dir}")

out_dir = (
os.path.join(str(args.cards_dir), args.model_name)
if args.model_name is not None
Expand Down Expand Up @@ -981,7 +979,10 @@ def fill_yields(channels, channels_summed):
"processes_rates": [],
}

rates_dict = {}

for channel in channels:
rates_dict[channel] = {}
for i, key in enumerate(sig_keys + bg_keys + [qcd_data_key]):
channel_bins_dict["bins_x_processes"].append(channel)
channel_bins_dict["processes_index"].append(i + 1 - len(sig_keys))
Expand All @@ -991,23 +992,86 @@ def fill_yields(channels, channels_summed):
else:
channel_bins_dict["processes_per_bin"].append(mc_samples[key])
channel_bins_dict["processes_rates"].append(channels_summed[channel][key].value)
rates_dict[channel][key] = channels_summed[channel][key].value

for key, arr in channel_bins_dict.items():
datacard_dict[key] = join_with_padding(arr)

return datacard_dict


def get_systematics_abcd(channels, channels_summed):
channel_systs_dict = {}

for region in channels:
channel_systs_dict[region] = {}

pass_region = region.startswith("pass")
region_nosidebands = region.split("_sidebands")[0]
sideband_str = "_sidebands" if region.endswith("_sidebands") else ""

logging.info("starting region: %s" % region)

for sample_name, card_name in mc_samples.items():
systs_dict = {}
channel_systs_dict[region][sample_name] = systs_dict

# rate systematics
for skey, syst in nuisance_params.items():
if sample_name not in syst.samples or (not pass_region and syst.pass_only):
continue

val, val_down = syst.value, syst.value_down
if syst.diff_regions:
val = val[region_nosidebands]
val_down = val_down[region_nosidebands] if val_down is not None else val_down
if syst.diff_samples:
val = val[sample_name]
val_down = val_down[sample_name] if val_down is not None else val_down

systs_dict[skey] = (val, val_down)

syst_strs = []
for skey in (
nuisance_params.keys() + corr_year_shape_systs.keys() + uncorr_year_shape_systs.keys()
):
sstr = f"{skey:<92}lnN "
vals = []
for channel in channels:
for i, key in enumerate(sig_keys + bg_keys + [qcd_data_key]):
if key == qcd_data_key or skey not in channel_systs_dict[channel][key]:
vals.append("-")
else:
val, val_down = channel_systs_dict[channel][key][skey]
val_str = val if val_down is None else f"{val}/{val_down}"

sstr += join_with_padding(vals)

syst_str = syst_strs.join("\n")

return syst_str


def createDatacardABCD(args, templates_dict, templates_summed, shape_vars):
# A, B, C, D (in order)
channels = ["pass", "pass_sidebands", "fail", "fail_sidebands"] # analogous to regions
channels_dict, channels_summed = get_channels(templates_dict, templates_summed, shape_vars[0])

datacard_dict = fill_yields(channels, channels_summed)
datacard_dict["systematics"] = get_systematics_abcd(channels, channels_summed)

for region in channels:
region_templates = channels_summed[region]
out_dir = (
os.path.join(str(args.cards_dir), args.model_name)
if args.model_name is not None
else args.cards_dir
)

with open(f"{out_dir}/datacard.txt", "w") as f:
f.write(helpers.abcd_datacard_template.substitute(datacard_dict))

return

for region in channels:
pass_region = region.startswith("pass")
region_nosidebands = region.split("_sidebands")[0]
sideband_str = "_sidebands" if region.endswith("_sidebands") else ""
Expand Down Expand Up @@ -1040,40 +1104,6 @@ def createDatacardABCD(args, templates_dict, templates_summed, shape_vars):

# no MC stats?? I removed it

# rate systematics
for skey, syst in nuisance_params.items():
if sample_name not in syst.samples or (not pass_region and syst.pass_only):
continue

logging.info(f"Getting {skey} rate")
if skey == "CMS_bbWW_hadronic_triggerEffSF_uncorrelated":
logging.info(
f"\nSkipping rate systematics calc for {skey} of {sample_name} in {region} region\n"
)
continue

val, val_down = syst.value, syst.value_down
if syst.diff_regions:
region_name = region_noblinded
val = val[region_name]
val_down = val_down[region_name] if val_down is not None else val_down
if syst.diff_samples:
val = val[sample_name]
val_down = val_down[sample_name] if val_down is not None else val_down

# sample.setParamEffect(param, val, effect_down=val_down)
print("513 rate systematics val, val down", val, val_down)
collected_data.append(
{
"type": "rate systematics",
"region": region,
"sample_name": sample_name,
"skey": skey,
"val": val,
"val_down": val_down,
}
)

# correlated shape systematics
for skey, syst in corr_year_shape_systs.items():
if sample_name not in syst.samples or (not pass_region and syst.pass_only):
Expand Down Expand Up @@ -1237,9 +1267,6 @@ def createDatacardABCD(args, templates_dict, templates_summed, shape_vars):

syst_string = "\n".join(syst_list)

# fill datacard with args
with open("datacard.templ.txt", "r") as f:
templ = Template(f.read())
out_file = f"/home/users/annava/CMSSW_12_3_4/src/HiggsAnalysis/CombinedLimit/datacards/datacard.templ_{sig_key}.txt"
with open(out_file, "w") as f:
f.write(templ.substitute(datacard_dict))
Expand All @@ -1263,6 +1290,8 @@ def main(args):
for i, axis in enumerate(sample_templates.axes[1:])
]

os.makedirs(args.cards_dir, exist_ok=True)

dc_args = [args, templates_dict, templates_summed, shape_vars]
if args.vbf:
createDatacardABCD(*dc_args)
Expand Down
17 changes: 0 additions & 17 deletions src/HHbbVV/postprocessing/datacard.templ.txt

This file was deleted.

26 changes: 25 additions & 1 deletion src/HHbbVV/postprocessing/datacardHelpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from hist import Hist
import utils

from hh_vars import years as all_years


#################################################
# Common
Expand All @@ -24,13 +26,14 @@ class Syst:
# if both, region should be the higher level of the dictionary
value: Union[float, Dict[str, float]] = None
value_down: Union[float, Dict[str, float]] = None # if None assumes symmetric effect

# if the value is different for different regions or samples
diff_regions: bool = False
diff_samples: bool = False

samples: List[str] = None # samples affected by it
# in case of uncorrelated unc., which years to split into
uncorr_years: List[str] = field(default_factory=lambda: years)
uncorr_years: List[str] = field(default_factory=lambda: all_years)
pass_only: bool = False # is it applied only in the pass regions

def __post_init__(self):
Expand Down Expand Up @@ -250,6 +253,27 @@ def get_effect_updown(values_nominal, values_up, values_down, mask, logger):
#################################################


abcd_datacard_template = """
imax $num_bins
jmax $num_bgs
kmax *
---------------
bin $bins
observation $observations
------------------------------
bin $bins_x_processes
process $processes_per_bin
process $processes_index
rate $processes_rates
------------------------------
single_A rateParam $binA bkg (@0*@2/@1) single_B,single_C,single_D
single_B rateParam $binB bkg $obsB
single_C rateParam $binC bkg $obsC
single_D rateParam $binD bkg $obsD
$systematics
"""


def join_with_padding(items, padding: int = 36):
"""Joins items into a string, padded by ``padding`` spaces"""
ret = f"{{:<{padding}}}" * len(items)
Expand Down

0 comments on commit b8cce58

Please sign in to comment.