Skip to content

Commit

Permalink
cutflow bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Mar 7, 2024
1 parent 7b62a0c commit f61a5ba
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 42 deletions.
137 changes: 106 additions & 31 deletions src/HHbbVV/postprocessing/PostProcessRes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "cannot import name 'new_filters' from 'postprocessing' (/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/postprocessing.py)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 22\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CUT_MAX_VAL, ShapeVar\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mHHbbVV\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhh_vars\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 8\u001b[0m years,\n\u001b[1;32m 9\u001b[0m data_key,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 20\u001b[0m LUMI,\n\u001b[1;32m 21\u001b[0m )\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpostprocessing\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m res_shape_vars, new_filters, old_filters\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcollections\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OrderedDict\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n",
"\u001b[0;31mImportError\u001b[0m: cannot import name 'new_filters' from 'postprocessing' (/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/postprocessing.py)"
]
}
],
"outputs": [],
"source": [
"import utils\n",
"import plotting\n",
Expand All @@ -39,7 +27,7 @@
" jmsr_shifts,\n",
" LUMI,\n",
")\n",
"from postprocessing import res_shape_vars, new_filters, old_filters\n",
"from postprocessing import res_shape_vars, load_filters\n",
"\n",
"from collections import OrderedDict\n",
"\n",
Expand All @@ -58,7 +46,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -68,7 +56,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -84,7 +72,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -98,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -110,7 +98,7 @@
"# samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Feb24\"\n",
"# nonres_signal_samples_dir = \"/eos/uscms/store/user/cmantill/bbVV/skimmer/Jun10/\"\n",
"# res_signal_samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Apr11/\"\n",
"year = \"2016APV\"\n",
"year = \"2016\"\n",
"\n",
"date = \"24Mar6\"\n",
"plot_dir = MAIN_DIR / f\"plots/PostProcessing/{date}/\"\n",
Expand All @@ -134,9 +122,103 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded GluGluToHHTobbVV_node_cHHH1 : 100561 entries\n",
"Loaded VBF_HHTobbVV_CV_1_C2V_1_C3_1 : 9678 entries\n",
"Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 : 79014 entries\n",
"Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 : 119555 entries\n",
"Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 : 154938 entries\n",
"Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 : 166706 entries\n",
"Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150 : 166511 entries\n",
"Loaded QCD_HT300to500 : 8 entries\n",
"Loaded QCD_HT700to1000 : 79891 entries\n",
"Loaded QCD_HT1000to1500 : 54883 entries\n",
"Loaded QCD_HT2000toInf : 29965 entries\n",
"Loaded QCD_HT1500to2000 : 65548 entries\n",
"Loaded QCD_HT500to700 : 6597 entries\n",
"Loaded TTToSemiLeptonic : 563649 entries\n",
"Loaded TTToHadronic : 1012608 entries\n",
"Loaded ST_t-channel_top_4f_InclusiveDecays : 38358 entries\n",
"Loaded ST_tW_top_5f_inclusiveDecays : 8839 entries\n",
"Loaded ST_tW_antitop_5f_inclusiveDecays : 9586 entries\n",
"Loaded ST_t-channel_antitop_4f_InclusiveDecays : 19552 entries\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/postprocessing.py:913: UserWarning: No events for WJetsToQQ_HT-200to400!\n",
" warnings.warn(f\"No events for {sample}!\", stacklevel=1)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded WJetsToQQ_HT-800toInf : 142130 entries\n",
"Loaded WJetsToQQ_HT-600to800 : 33598 entries\n",
"Loaded WJetsToQQ_HT-400to600 : 293 entries\n",
"Loaded ZJetsToQQ_HT-200to400 : 1 entries\n",
"Loaded ZJetsToQQ_HT-400to600 : 593 entries\n",
"Loaded ZJetsToQQ_HT-600to800 : 51577 entries\n",
"Loaded ZJetsToQQ_HT-800toInf : 148021 entries\n",
"Loaded WW : 1894 entries\n",
"Loaded ZZ : 729 entries\n",
"Loaded WZ : 3146 entries\n",
"Loaded GluGluHToBB : 12018 entries\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/postprocessing.py:905: UserWarning: No parquet directory for VBFHToBB!\n",
" warnings.warn(f\"No parquet directory for {sample}!\", stacklevel=1)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded ZH_HToBB_ZToQQ : 29277 entries\n",
"Loaded WplusH_HToBB_WToQQ : 16362 entries\n",
"Loaded WminusH_HToBB_WToQQ : 21367 entries\n",
"Loaded ggZH_HToBB_ZToQQ : 22517 entries\n",
"Loaded ttHToBB : 193853 entries\n",
"Loaded VBFHToWWToAny_M-125_TuneCP5_withDipoleRecoil : 72 entries\n",
"Loaded HWminusJ_HToWW_M-125 : 1386 entries\n",
"Loaded GluGluHToWW_Pt-200ToInf_M-125 : 652 entries\n",
"Loaded HWplusJ_HToWW_M-125 : 1841 entries\n",
"Loaded ttHToNonbb_M125 : 52851 entries\n",
"Loaded HZJ_HToWW_M-125 : 10787 entries\n",
"Loaded JetHT_Run2016C_HIPM : 167582 entries\n",
"Loaded JetHT_Run2016D_HIPM : 287156 entries\n",
"Loaded JetHT_Run2016E_HIPM : 273569 entries\n",
"Loaded JetHT_Run2016B_ver2_HIPM : 423991 entries\n",
"Loaded JetHT_Run2016F_HIPM : 178714 entries\n"
]
},
{
"ename": "KeyError",
"evalue": "'VBFHbb'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[8], line 15\u001b[0m\n\u001b[1;32m 4\u001b[0m cutflow \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(\n\u001b[1;32m 5\u001b[0m index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mlist\u001b[39m(samples\u001b[38;5;241m.\u001b[39mkeys()) \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(nonres_samples\u001b[38;5;241m.\u001b[39mkeys()) \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(res_samples\u001b[38;5;241m.\u001b[39mkeys())\n\u001b[1;32m 6\u001b[0m )\n\u001b[1;32m 8\u001b[0m events_dict \u001b[38;5;241m=\u001b[39m postprocessing\u001b[38;5;241m.\u001b[39mload_samples(\n\u001b[1;32m 9\u001b[0m samples_dir,\n\u001b[1;32m 10\u001b[0m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mnonres_samples, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mres_samples, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39msamples},\n\u001b[1;32m 11\u001b[0m year,\n\u001b[1;32m 12\u001b[0m load_filters,\n\u001b[1;32m 13\u001b[0m )\n\u001b[0;32m---> 15\u001b[0m \u001b[43mutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_to_cutflow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevents_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPreselection\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfinalWeight\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcutflow\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m cutflow\n",
"File \u001b[0;32m~/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/utils.py:298\u001b[0m, in \u001b[0;36madd_to_cutflow\u001b[0;34m(events_dict, key, weight_key, cutflow)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21madd_to_cutflow\u001b[39m(\n\u001b[1;32m 296\u001b[0m events_dict: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, pd\u001b[38;5;241m.\u001b[39mDataFrame], key: \u001b[38;5;28mstr\u001b[39m, weight_key: \u001b[38;5;28mstr\u001b[39m, cutflow: pd\u001b[38;5;241m.\u001b[39mDataFrame\n\u001b[1;32m 297\u001b[0m ):\n\u001b[0;32m--> 298\u001b[0m cutflow[key] \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 299\u001b[0m np\u001b[38;5;241m.\u001b[39msum(events_dict[sample][weight_key])\u001b[38;5;241m.\u001b[39msqueeze() \u001b[38;5;28;01mfor\u001b[39;00m sample \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(cutflow\u001b[38;5;241m.\u001b[39mindex)\n\u001b[1;32m 300\u001b[0m ]\n",
"File \u001b[0;32m~/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/utils.py:299\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21madd_to_cutflow\u001b[39m(\n\u001b[1;32m 296\u001b[0m events_dict: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, pd\u001b[38;5;241m.\u001b[39mDataFrame], key: \u001b[38;5;28mstr\u001b[39m, weight_key: \u001b[38;5;28mstr\u001b[39m, cutflow: pd\u001b[38;5;241m.\u001b[39mDataFrame\n\u001b[1;32m 297\u001b[0m ):\n\u001b[1;32m 298\u001b[0m cutflow[key] \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m--> 299\u001b[0m np\u001b[38;5;241m.\u001b[39msum(\u001b[43mevents_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43msample\u001b[49m\u001b[43m]\u001b[49m[weight_key])\u001b[38;5;241m.\u001b[39msqueeze() \u001b[38;5;28;01mfor\u001b[39;00m sample \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(cutflow\u001b[38;5;241m.\u001b[39mindex)\n\u001b[1;32m 300\u001b[0m ]\n",
"\u001b[0;31mKeyError\u001b[0m: 'VBFHbb'"
]
}
],
"source": [
"systematics = {year: {}}\n",
"\n",
Expand All @@ -146,17 +228,10 @@
")\n",
"\n",
"events_dict = postprocessing.load_samples(\n",
" sig_samples_dir,\n",
" {**nonres_samples, **res_samples},\n",
" year,\n",
" new_filters,\n",
")\n",
"\n",
"events_dict |= postprocessing.load_samples(\n",
" samples_dir,\n",
" samples,\n",
" {**nonres_samples, **res_samples, **samples},\n",
" year,\n",
" new_filters,\n",
" load_filters,\n",
")\n",
"\n",
"utils.add_to_cutflow(events_dict, \"Preselection\", \"finalWeight\", cutflow)\n",
Expand Down
3 changes: 2 additions & 1 deletion src/HHbbVV/postprocessing/bash_scripts/ControlPlots.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
MAIN_DIR="../../.."
TAG=""
resonant="--resonant"
samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150"
# samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150"
samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80"
hem2d="--HEM2d"

options=$(getopt -o "" --long "nonresonant,nohem2d,tag:" -- "$@")
Expand Down
4 changes: 2 additions & 2 deletions src/HHbbVV/postprocessing/bash_scripts/MassPlots.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ if [[ -z $TAG ]]; then
fi

# for year in 2016APV 2016 2017 2018
for year in 2016APV
for year in 2016
do
python -u postprocessing.py --control-plots --year $year --resonant \
--data-dir "${MAIN_DIR}/../data/skimmer/24Mar5AllYears" \
--sig-samples $samples \
--plot-dir "${MAIN_DIR}/plots/PostProcessing/$TAG" \
--no-filters --control-plot-vars "bbFatJetParticleNetMass" "bbFatJetMsd" "VVFatJetParticleNetMass" "VVFatJetMsd"
--mass-plots
done
47 changes: 39 additions & 8 deletions src/HHbbVV/postprocessing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class Region:
bins=[20, 50, 250],
significance_dir="bin",
),
ShapeVar(var="bbFatJetMsd", label=r"$m^{bb}_{msd}$ (GeV)", bins=[20, 0, 300]),
ShapeVar(var="bbFatJetMsd", label=r"$m^{bb}_{msd}$ (GeV)", bins=[20, 50, 250]),
ShapeVar(var="bbFatJetParticleNetMD_Txbb", label=r"$T^{bb}_{Xbb}$", bins=[20, 0.8, 1]),
ShapeVar(var="VVFatJetEta", label=r"$\eta^{VV}$", bins=[20, -2.4, 2.4]),
ShapeVar(var="VVFatJetPt", label=r"$p^{VV}_T$ (GeV)", bins=[20, 300, 2300]),
Expand Down Expand Up @@ -124,6 +124,15 @@ class Region:
]


# for msd vs mreg comparison plots only
mass_plot_vars = [
ShapeVar(var="bbFatJetParticleNetMass", label=r"$m^{bb}_{reg}$ (GeV)", bins=[30, 0, 300]),
ShapeVar(var="bbFatJetMsd", label=r"$m^{bb}_{msd}$ (GeV)", bins=[30, 0, 300]),
ShapeVar(var="VVFatJetParticleNetMass", label=r"$m^{VV}_{reg}$ (GeV)", bins=[30, 0, 300]),
ShapeVar(var="VVFatJetMsd", label=r"$m^{VV}_{msd}$ (GeV)", bins=[30, 0, 300]),
]


def get_nonres_selection_regions(
year: str,
txbb_wp: str = "MP",
Expand Down Expand Up @@ -432,24 +441,26 @@ def main(args):
# Control plots
if args.control_plots:
print("\nMaking control plots\n")
plot_vars = mass_plot_vars if args.mass_plots else control_plot_vars
if len(args.control_plot_vars):
for var in control_plot_vars.copy():
for var in plot_vars.copy():
if var.var not in args.control_plot_vars:
control_plot_vars.remove(var)
plot_vars.remove(var)

print("Plotting: ", [var.var for var in control_plot_vars])
print("Plotting: ", [var.var for var in plot_vars])

control_plots(
events_dict,
bb_masks,
sig_keys,
control_plot_vars,
plot_vars,
args.control_plots_dir,
args.year,
bg_keys=args.bg_keys,
sig_scale_dict={"HHbbVV": 1e5, "VBFHHbbVV": 2e6} | {key: 2e4 for key in res_sig_keys},
# sig_splits=sig_splits,
HEM2d=args.HEM2d,
same_ylim=args.mass_plots,
show=False,
)

Expand Down Expand Up @@ -910,7 +921,11 @@ def load_samples(

# no parquet directory?
if not parquet_path.exists():
warnings.warn(f"No parquet directory for {sample}!", stacklevel=1)
if not (
(year == "2016" and sample.endswith("HIPM"))
or (year == "2016APV" and not sample.endswith("HIPM"))
): # don't complain about 2016/HIPM mismatch
warnings.warn(f"No parquet directory for {sample}!", stacklevel=1)
continue

# print(f"Loading {sample}")
Expand Down Expand Up @@ -973,7 +988,7 @@ def _load_samples(args, samples, sig_samples, cutflow):

print("Samples: ", list(events_dict.keys()))

utils.add_to_cutflow(events_dict, "Pre-selection", "weight", cutflow)
utils.add_to_cutflow(events_dict, "Pre-selection", "finalWeight", cutflow)

print("")
# print weighted sample yields
Expand Down Expand Up @@ -1406,6 +1421,7 @@ def control_plots(
combine_pdf: bool = True,
HEM2d: bool = False,
plot_significance: bool = False,
same_ylim: bool = False,
show: bool = False,
log: tuple[bool, str] = "both",
):
Expand All @@ -1416,6 +1432,9 @@ def control_plots(
control_plot_vars (Dict[str, Tuple]): Dictionary of variables to plot, formatted as
{var1: ([num bins, min, max], label), var2...}.
sig_splits: split up signals into different plots (in case there are too many for one)
HEM2d: whether to plot 2D hists of FatJet phi vs eta for bb and VV jets as a check for HEM cleaning.
plot_significance: whether to plot the significance as well as the ratio plot.
same_ylim: whether to use the same y-axis limits for all plots.
log: True or False if plot on log scale or not - or "both" if both.
"""

Expand All @@ -1440,6 +1459,8 @@ def control_plots(
events_dict, shape_var, bb_masks, weight_key=weight_key, selection=selection
)

ylim = np.max([h.values() for h in hists.values()]) if same_ylim else None

if HEM2d and year == "2018":
hists["HEM2d"] = hists_HEM2d(events_dict, bb_masks, weight_key, selection)

Expand Down Expand Up @@ -1474,7 +1495,7 @@ def control_plots(
significance_dir=shape_var.significance_dir,
show=show,
log=log,
ylim=None if not log else 1e15,
ylim=ylim if not log else 1e15,
)
merger_control_plots.append(name)

Expand Down Expand Up @@ -1946,6 +1967,12 @@ def save_templates(
add_bool_arg(parser, "resonant", "for resonant or nonresonant", default=False)
add_bool_arg(parser, "vbf", "non-resonant VBF or inclusive", default=False)
add_bool_arg(parser, "control-plots", "make control plots", default=False)
add_bool_arg(
parser,
"mass-plots",
"make mass comparison plots (filters will automatically be turned off)",
default=False,
)
add_bool_arg(parser, "bdt-plots", "make bdt sculpting plots", default=False)
add_bool_arg(parser, "templates", "save m_bb templates using bdt cut", default=False)
add_bool_arg(
Expand Down Expand Up @@ -2071,4 +2098,8 @@ def save_templates(
# can't do HEM cleaning for non-resonant until BDT is re-inferenced
args.hem_cleaning = bool(args.resonant or args.vbf)

if args.mass_plots:
args.control_plots = True
args.filters = False

main(args)

0 comments on commit f61a5ba

Please sign in to comment.