Skip to content

Commit

Permalink
ggf templates update
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Mar 8, 2024
1 parent 982cf41 commit c0e56a1
Show file tree
Hide file tree
Showing 20 changed files with 549 additions and 21 deletions.
11 changes: 10 additions & 1 deletion src/HHbbVV/postprocessing/PostProcessRes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,19 @@
"metadata": {},
"outputs": [],
"source": [
"events = pd.read_parquet(f\"{sig_samples_dir}/{year}/GluGluToHHTobbVV_node_cHHH1/parquet\")\n",
"events = pd.read_parquet(f\"{samples_dir}/{year}/GluGluToHHTobbVV_node_cHHH1/parquet\")\n",
"events"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"list(events.columns)"
]
},
{
"attachments": {},
"cell_type": "markdown",
Expand Down
31 changes: 22 additions & 9 deletions src/HHbbVV/postprocessing/TrainBDT.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,15 +677,22 @@ def do_inference(
type=int,
)

"""
Varying between 0.01 - 1 showed no significant difference
https://hhbbvv.nrp-nautilus.io/bdt/24_03_07_new_samples_lr_0.01/
https://hhbbvv.nrp-nautilus.io/bdt/24_03_07_new_samples_nestimators_10000/
https://hhbbvv.nrp-nautilus.io/bdt/24_03_07_new_samples_lr_1/
"""
parser.add_argument("--learning-rate", default=0.1, help="learning rate", type=float)
"""
hyperparam optimizations show max depth 3 or 4 is optimal:
https://hhbbvv.nrp-nautilus.io/bdt/23_11_02_rem_feats_3_min_delta_0.0005_max_depth_3/
https://hhbbvv.nrp-nautilus.io/bdt/23_11_02_rem_feats_3_min_delta_0.0005_max_depth_4/
https://hhbbvv.nrp-nautilus.io/bdt/23_11_02_rem_feats_3_min_delta_0.0005_max_depth_5/
hyperparam optimizations show max depth 5 is optimal:
https://hhbbvv.nrp-nautilus.io/bdt/24_03_07_new_samples_nestimators_10000/
https://hhbbvv.nrp-nautilus.io/bdt/24_03_07_new_samples_max_depth_4/
https://hhbbvv.nrp-nautilus.io/bdt/24_03_07_new_samples_max_depth_5/
https://hhbbvv.nrp-nautilus.io/bdt/24_03_07_new_samples_max_depth_6/
unclear if gain from 4 is enough to justify increasing complexity
"""
parser.add_argument("--max-depth", default=3, help="max depth of each tree", type=int)
parser.add_argument("--max-depth", default=5, help="max depth of each tree", type=int)
"""
hyperparam optimizations show min child weight has ~no effect
https://hhbbvv.nrp-nautilus.io/bdt/23_05_10_multiclass_max_depth_3_min_child_1_n_1000/
Expand All @@ -697,15 +704,17 @@ def do_inference(
help="minimum weight required to keep splitting (higher is more conservative)",
type=float,
)
"""
this just needs to be higher than the # rounds needed for early-stopping to kick in
"""
# This just needs to be higher than the # rounds needed for early-stopping to kick in
parser.add_argument(
"--n-estimators", default=1000, help="max number of trees to keep adding", type=int
"--n-estimators", default=1 - 000, help="max number of trees to keep adding", type=int
)

parser.add_argument("--rem-feats", default=0, help="remove N lowest importance feats", type=int)

"""
Slightly worse to use a single tagger score
https://hhbbvv.nrp-nautilus.io/bdt/24_03_07_new_samples_single_tagger_var
"""
add_bool_arg(
parser, "all-tagger-vars", "Use all tagger outputs vs. single THWWvsT score", default=True
)
Expand All @@ -731,6 +740,10 @@ def do_inference(
parser.add_argument(
"--early-stopping-rounds", default=5, help="early stopping rounds", type=int
)
"""
Increasing this consistently decreased performance
e.g. https://hhbbvv.nrp-nautilus.io/bdt/24_03_07_new_samples_min_delta_0.0001/
"""
parser.add_argument(
"--early-stopping-min-delta",
default=0.0,
Expand Down
5 changes: 2 additions & 3 deletions src/HHbbVV/postprocessing/bash_scripts/ControlPlots.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
MAIN_DIR="../../.."
TAG=""
resonant="--resonant"
# samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150"
samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80"
samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150"
hem2d="--HEM2d"

options=$(getopt -o "" --long "nonresonant,nohem2d,tag:" -- "$@")
Expand All @@ -28,7 +27,7 @@ while true; do
case "$1" in
--nonresonant)
resonant=""
samples="HHbbVV qqHH_CV_1_C2V_1_kl_1_HHbbVV"
samples="HHbbVV VBFHHbbVV"
;;
--nohem2d)
hem2d=""
Expand Down
51 changes: 51 additions & 0 deletions src/HHbbVV/postprocessing/bash_scripts/NonresTemplates.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash
# shellcheck disable=SC2086,SC2043

####################################################################################################
# Script for creating nonresonant templates + BDT score control plots
# Author: Raghav Kansal
####################################################################################################

MAIN_DIR="../../.."
data_dir="$MAIN_DIR/../data/skimmer/24Mar6AllYearsBDTVars"
TAG=""


options=$(getopt -o "" --long "tag:" -- "$@")
eval set -- "$options"

while true; do
case "$1" in
--tag)
shift
TAG=$1
;;
--)
shift
break;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
shift
done

if [[ -z $TAG ]]; then
echo "Tag required using the --tag option. Exiting"
exit 1
fi

# for year in 2016APV 2016 2017 2018
for year in 2016APV
do
python -u postprocessing.py --year $year --data-dir "$data_dir" --templates \
--bdt-preds-dir "$MAIN_DIR/../data/skimmer/24Mar6AllYearsBDTVars/24_03_07_new_samples_max_depth_5/inferences" \
--control-plots --control-plot-vars "BDTScore" \
--plot-dir "${MAIN_DIR}/plots/PostProcessing/$TAG" \
--template-dir "templates/$TAG" --plot-shifts
done
33 changes: 25 additions & 8 deletions src/HHbbVV/postprocessing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,9 +405,9 @@ def main(args):
derive_variables(
events_dict,
bb_masks,
nonres_vars=not args.resonant or args.control_plots,
nonres_vars=args.vbf or args.control_plots,
vbf_vars=args.vbf,
do_jshifts=args.templates, # only need shifts if making templates
do_jshifts=False, # only need shifts for BDT pre-processing
)

# args has attr if --control-plots arg was set
Expand Down Expand Up @@ -440,16 +440,27 @@ def main(args):
plot_vars.remove(var)

print("Plotting: ", [var.var for var in plot_vars])
if args.resonant:
p_sig_keys = sig_keys
sig_scale_dict = {"HHbbVV": 1e5, "VBFHHbbVV": 2e6} | {key: 2e4 for key in res_sig_keys}
else:
p_sig_keys = plot_sig_keys_nonres
sig_scale_dict = {
"HHbbVV": 1e5,
"VBFHHbbVV": 2e5,
"qqHH_CV_1_C2V_0_kl_1_HHbbVV": 2e3,
"qqHH_CV_1_C2V_2_kl_1_HHbbVV": 2e3,
}

control_plots(
events_dict,
bb_masks,
sig_keys,
p_sig_keys,
plot_vars,
args.control_plots_dir,
args.year,
bg_keys=args.bg_keys,
sig_scale_dict={"HHbbVV": 1e5, "VBFHHbbVV": 2e6} | {key: 2e4 for key in res_sig_keys},
sig_scale_dict=sig_scale_dict,
# sig_splits=sig_splits,
HEM2d=args.HEM2d,
same_ylim=args.mass_plots,
Expand All @@ -464,6 +475,11 @@ def main(args):
)

if args.templates:
if args.resonant:
sig_scale_dict = None
else:
sig_scale_dict = {key: 10 for key in plot_sig_keys_nonres}

for wps in scan_wps: # if not scanning, this will just be a single WP
cutstr = "_".join([f"{cut}_{wp}" for cut, wp in zip(scan_cuts, wps)]) if scan else ""
template_dir = args.template_dir / cutstr / args.templates_name
Expand Down Expand Up @@ -503,7 +519,7 @@ def main(args):
for jshift in jshifts:
print(jshift)
plot_dir = (
args.templates_plots_dir / cutstr / f"{'jshifts/' if jshift != '' else ''}"
args.templates_plots_dir / cutstr / f"{'jshifts' if jshift != '' else ''}"
if args.plot_dir != ""
else ""
)
Expand All @@ -521,6 +537,7 @@ def main(args):
plot_dir=plot_dir,
prev_cutflow=cutflow,
# sig_splits=sig_splits,
sig_scale_dict=sig_scale_dict,
weight_shifts=weight_shifts,
jshift=jshift,
blind_pass=bool(args.resonant),
Expand Down Expand Up @@ -796,9 +813,6 @@ def _make_dirs(args, scan, scan_cuts, scan_wps):
parents=True, exist_ok=True
)

if args.bdt_preds_dir != "" and args.bdt_preds_dir is not None:
args.bdt_preds_dir = Path(args.bdt_preds_dir)


def _normalize_weights(
events: pd.DataFrame, totals: dict, sample: str, isData: bool, variations: bool = True
Expand Down Expand Up @@ -2048,6 +2062,9 @@ def parse_args():
if not args.signal_data_dirs:
args.signal_data_dirs = [args.data_dir]

if args.bdt_preds_dir != "" and args.bdt_preds_dir is not None:
args.bdt_preds_dir = Path(args.bdt_preds_dir)

if args.bdt_preds_dir == "" and not args.resonant:
args.bdt_preds_dir = f"{args.data_dir}/inferences/"
elif args.resonant:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
,Pre-selection,QCD SF,LP SF,bbFatJetPt >= 300,VVFatJetPt >= 300,0.8 ≤ bbFatJetParticleNetMD_Txbb < 0.9737
HHbbVV,1.108127280079604,1.108127280079604,1.108127280079604,1.1005594143174928,1.0940812392241102,0.3689082675908363
ggHH_kl_2p45_kt_1_HHbbVV,0.8015758690220187,0.8015758690220187,0.8015758690220187,0.7966181927909652,0.7929251357407407,0.27643952958598983
ggHH_kl_5_kt_1_HHbbVV,0.8397792227107982,0.8397792227107982,0.8397792227107982,0.8351909835840614,0.8317756967171981,0.37385989120403884
ggHH_kl_0_kt_1_HHbbVV,1.4337517442784347,1.4337517442784347,1.4337517442784347,1.4226156102926775,1.4129101659183219,0.4851970922348211
VBFHHbbVV,0.02157241124523907,0.02157241124523907,0.02157241124523907,0.021438896099242835,0.021315873451069035,0.007693806502845373
qqHH_CV_1_C2V_0_kl_1_HHbbVV,11.015595087816134,11.015595087816134,11.015595087816134,10.992704362069707,10.974097162767547,2.9996084084430152
qqHH_CV_1p5_C2V_1_kl_1_HHbbVV,18.523340727906472,18.523340727906472,18.523340727906472,18.47731328698665,18.44388166206678,5.094048669628073
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.021501518954477026,0.021501518954477026,0.021501518954477026,0.021387774292197015,0.02128691499756289,0.010393282892467392
qqHH_CV_1_C2V_2_kl_1_HHbbVV,9.95063113617899,9.95063113617899,9.95063113617899,9.931050331915193,9.916982995696745,2.704553977997201
qqHH_CV_1_C2V_1_kl_0_HHbbVV,0.05626247636667933,0.05626247636667933,0.05626247636667933,0.05592359011921978,0.05559233902875846,0.017972405932963762
QCD,900603.372757249,1219440.0804636534,1219440.0804636534,1208187.047601593,1199914.6854996895,968194.6958131382
TT,76537.02908224265,76537.02908224265,76537.02908224265,76036.67089183381,75504.25522211792,59756.04620421225
ST,6617.229257123121,6617.229257123121,6617.229257123121,6568.071623368754,6530.558860743446,5277.663381085969
W+Jets,12421.273275458932,12421.273275458932,12421.273275458932,12341.31450378544,12265.115729916284,10267.98041055657
Z+Jets,14717.02364266734,14717.02364266734,14717.02364266734,14601.426751424733,14517.101697865306,8391.373012782422
Diboson,703.6912877316817,703.6912877316817,703.6912877316817,699.1389217855917,695.3097641927543,427.8095451515573
ggFHbb,138.81856362723408,138.81856362723408,138.81856362723408,138.12124620347993,137.14696515108136,45.30216339603112
VBFHbb,40.60062332311162,40.60062332311162,40.60062332311162,40.47235839402518,40.3369080970762,12.910667691672323
ZHbb,38.56092725636813,38.56092725636813,38.56092725636813,38.30734801182328,38.14508043229702,11.799771340315372
WHbb,66.232275837835,66.232275837835,66.232275837835,65.79568909543664,65.5789510146088,22.935801367330377
ggZHbb,5.921431593700421,5.921431593700421,5.921431593700421,5.88900772329133,5.855043713430598,2.3432337853521314
ttHbb,187.9810602386041,187.9810602386041,187.9810602386041,186.85674951555848,185.63841488932138,116.0286360102471
HWW,97.55810924622651,97.55810924622651,97.55810924622651,97.00857794271565,96.26976864081334,77.25306730184919
Data,1331012.0,1331012.0,1331012.0,1331012.0,1331012.0,1100458.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
,Pre-selection,QCD SF,LP SF,bbFatJetPt_JER_down >= 300,VVFatJetPt_JER_down >= 300,0.8 ≤ bbFatJetParticleNetMD_Txbb < 0.9737,bbFatJetPt >= 300,VVFatJetPt >= 300
HHbbVV,1.108127280079604,1.108127280079604,1.108127280079604,1.1007654125568072,1.0943466848718584,0.3689838386278276,,
ggHH_kl_2p45_kt_1_HHbbVV,0.8015758690220187,0.8015758690220187,0.8015758690220187,0.7966839978476782,0.7929702737652506,0.27643350911904124,,
ggHH_kl_5_kt_1_HHbbVV,0.8397792227107982,0.8397792227107982,0.8397792227107982,0.834916579637446,0.8313931208750229,0.3735844185203877,,
ggHH_kl_0_kt_1_HHbbVV,1.4337517442784347,1.4337517442784347,1.4337517442784347,1.422734538030712,1.413057483248668,0.48502545101830774,,
VBFHHbbVV,0.02157241124523907,0.02157241124523907,0.02157241124523907,0.021438601541618647,0.02130944961532647,0.00768597256859618,,
qqHH_CV_1_C2V_0_kl_1_HHbbVV,11.015595087816134,11.015595087816134,11.015595087816134,10.993916278669817,10.975351227637582,3.000222893686523,,
qqHH_CV_1p5_C2V_1_kl_1_HHbbVV,18.523340727906472,18.523340727906472,18.523340727906472,18.47778093467595,18.44445480245551,5.093988828230345,,
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.021501518954477026,0.021501518954477026,0.021501518954477026,0.02138682863811925,0.021273317169623898,0.010382398039568122,,
qqHH_CV_1_C2V_2_kl_1_HHbbVV,9.95063113617899,9.95063113617899,9.95063113617899,9.93227980894857,9.91773923265224,2.705069151260465,,
qqHH_CV_1_C2V_1_kl_0_HHbbVV,0.05626247636667933,0.05626247636667933,0.05626247636667933,0.05595921964896841,0.05560877654128435,0.017990259411435415,,
QCD,900603.372757249,1219440.0804636534,1219440.0804636534,1208182.4786762223,1200009.3644063028,968115.9810780144,,
TT,76537.02908224265,76537.02908224265,76537.02908224265,76005.7888408999,75436.82978583084,59698.85476465417,,
ST,6617.229257123121,6617.229257123121,6617.229257123121,6566.328361234981,6530.907835704748,5277.757625123997,,
W+Jets,12421.273275458932,12421.273275458932,12421.273275458932,12339.320639263746,12260.495964100859,10264.915822931636,,
Z+Jets,14717.02364266734,14717.02364266734,14717.02364266734,14597.661223947085,14510.332852864116,8387.14164328013,,
Diboson,703.6912877316817,703.6912877316817,703.6912877316817,698.579291598942,694.5916383161529,427.30679500407336,,
ggFHbb,138.81856362723408,138.81856362723408,138.81856362723408,138.15554634923888,137.17501694965563,45.32742148315782,,
VBFHbb,40.60062332311162,40.60062332311162,40.60062332311162,40.49535934358885,40.33863807551278,12.901762241315136,,
ZHbb,38.56092725636813,38.56092725636813,38.56092725636813,38.30942955891692,38.14570716432015,11.798125796489272,,
WHbb,66.232275837835,66.232275837835,66.232275837835,65.79959420574862,65.60151072204596,22.93473446572348,,
ggZHbb,5.921431593700421,5.921431593700421,5.921431593700421,5.889087636165095,5.8513654951795635,2.3421762080813324,,
ttHbb,187.9810602386041,187.9810602386041,187.9810602386041,186.82068755865188,185.52754103860983,115.92756418020977,,
HWW,97.55810924622651,97.55810924622651,97.55810924622651,96.95233954850245,96.16828112430852,77.18297347550448,,
Data,1331012.0,1331012.0,1331012.0,,,1100458.0,1331012.0,1331012.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
,Pre-selection,QCD SF,LP SF,bbFatJetPt_JER_up >= 300,VVFatJetPt_JER_up >= 300,0.8 ≤ bbFatJetParticleNetMD_Txbb < 0.9737,bbFatJetPt >= 300,VVFatJetPt >= 300
HHbbVV,1.108127280079604,1.108127280079604,1.108127280079604,1.1000283455508442,1.093563222944563,0.36881957801286125,,
ggHH_kl_2p45_kt_1_HHbbVV,0.8015758690220187,0.8015758690220187,0.8015758690220187,0.7964922393403449,0.7928382739676629,0.27643335964798127,,
ggHH_kl_5_kt_1_HHbbVV,0.8397792227107982,0.8397792227107982,0.8397792227107982,0.835058321937548,0.8316139992092635,0.37364012018238557,,
ggHH_kl_0_kt_1_HHbbVV,1.4337517442784347,1.4337517442784347,1.4337517442784347,1.4228120731930771,1.4130533265554628,0.4854211537809283,,
VBFHHbbVV,0.02157241124523907,0.02157241124523907,0.02157241124523907,0.021440944541920692,0.021327016264919646,0.007706054195496111,,
qqHH_CV_1_C2V_0_kl_1_HHbbVV,11.015595087816134,11.015595087816134,11.015595087816134,10.991439674710634,10.972925334791851,2.999259952008066,,
qqHH_CV_1p5_C2V_1_kl_1_HHbbVV,18.523340727906472,18.523340727906472,18.523340727906472,18.47629524681861,18.443564870299006,5.0935178853446885,,
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.021501518954477026,0.021501518954477026,0.021501518954477026,0.02138645978059985,0.021286140659751034,0.010394625507128875,,
qqHH_CV_1_C2V_2_kl_1_HHbbVV,9.95063113617899,9.95063113617899,9.95063113617899,9.930464059707752,9.916398819305595,2.7044320235364823,,
qqHH_CV_1_C2V_1_kl_0_HHbbVV,0.05626247636667933,0.05626247636667933,0.05626247636667933,0.05588556143177802,0.05553878736217713,0.017954838124328118,,
QCD,900603.372757249,1219440.0804636534,1219440.0804636534,1208523.5722318934,1200033.7819080197,968333.4816015975,,
TT,76537.02908224265,76537.02908224265,76537.02908224265,76057.90175062326,75546.39913344698,59791.400372422875,,
ST,6617.229257123121,6617.229257123121,6617.229257123121,6569.594060609604,6535.389534232395,5281.431876922224,,
W+Jets,12421.273275458932,12421.273275458932,12421.273275458932,12341.825146611021,12264.924202464117,10268.312380642396,,
Z+Jets,14717.02364266734,14717.02364266734,14717.02364266734,14601.95480697945,14517.83317032325,8391.421257069986,,
Diboson,703.6912877316817,703.6912877316817,703.6912877316817,699.274850691623,695.7930322677172,427.8893356031577,,
ggFHbb,138.81856362723408,138.81856362723408,138.81856362723408,138.1359310079184,137.16716893739732,45.29711139715256,,
VBFHbb,40.60062332311162,40.60062332311162,40.60062332311162,40.474722810632116,40.36086441370259,12.918779347138004,,
ZHbb,38.56092725636813,38.56092725636813,38.56092725636813,38.29663889299799,38.13726355471172,11.809667152238447,,
WHbb,66.232275837835,66.232275837835,66.232275837835,65.79172252987414,65.57398960849181,22.928034541851552,,
ggZHbb,5.921431593700421,5.921431593700421,5.921431593700421,5.887904683527691,5.855217962847878,2.3434904878624687,,
ttHbb,187.9810602386041,187.9810602386041,187.9810602386041,186.88290996039103,185.72296875559454,116.09877758960441,,
HWW,97.55810924622651,97.55810924622651,97.55810924622651,97.04385400611787,96.33153606063196,77.29816288915787,,
Data,1331012.0,1331012.0,1331012.0,,,1100458.0,1331012.0,1331012.0
Loading

0 comments on commit c0e56a1

Please sign in to comment.