diff --git a/src/HHbbVV/postprocessing/PostProcessRes.ipynb b/src/HHbbVV/postprocessing/PostProcessRes.ipynb index 0fd23974..b1c03d61 100644 --- a/src/HHbbVV/postprocessing/PostProcessRes.ipynb +++ b/src/HHbbVV/postprocessing/PostProcessRes.ipynb @@ -2,21 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "cannot import name 'new_filters' from 'postprocessing' (/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/postprocessing.py)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 22\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CUT_MAX_VAL, ShapeVar\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mHHbbVV\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhh_vars\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 8\u001b[0m years,\n\u001b[1;32m 9\u001b[0m data_key,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 20\u001b[0m LUMI,\n\u001b[1;32m 21\u001b[0m )\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpostprocessing\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m res_shape_vars, new_filters, old_filters\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcollections\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OrderedDict\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n", - "\u001b[0;31mImportError\u001b[0m: cannot import name 'new_filters' from 'postprocessing' (/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/postprocessing.py)" - ] - } - ], + "outputs": [], "source": [ "import utils\n", "import plotting\n", @@ -39,7 +27,7 @@ " jmsr_shifts,\n", " LUMI,\n", ")\n", - "from postprocessing import res_shape_vars, new_filters, old_filters\n", + "from postprocessing import res_shape_vars, load_filters\n", "\n", "from collections import OrderedDict\n", "\n", @@ -58,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -68,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -84,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -98,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -110,7 +98,7 @@ "# samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Feb24\"\n", "# nonres_signal_samples_dir = \"/eos/uscms/store/user/cmantill/bbVV/skimmer/Jun10/\"\n", "# res_signal_samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Apr11/\"\n", - "year = \"2016APV\"\n", + "year = \"2016\"\n", "\n", "date = \"24Mar6\"\n", "plot_dir = MAIN_DIR / f\"plots/PostProcessing/{date}/\"\n", @@ -134,9 +122,103 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded GluGluToHHTobbVV_node_cHHH1 : 100561 entries\n", + "Loaded VBF_HHTobbVV_CV_1_C2V_1_C3_1 : 9678 entries\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 : 79014 entries\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 : 119555 entries\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 : 154938 entries\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 : 166706 entries\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150 : 166511 entries\n", + "Loaded QCD_HT300to500 : 8 entries\n", + "Loaded QCD_HT700to1000 : 79891 entries\n", + "Loaded QCD_HT1000to1500 : 54883 entries\n", + "Loaded QCD_HT2000toInf : 29965 entries\n", + "Loaded QCD_HT1500to2000 : 65548 entries\n", + "Loaded QCD_HT500to700 : 6597 entries\n", + "Loaded TTToSemiLeptonic : 563649 entries\n", + "Loaded TTToHadronic : 1012608 entries\n", + "Loaded ST_t-channel_top_4f_InclusiveDecays : 38358 entries\n", + "Loaded ST_tW_top_5f_inclusiveDecays : 8839 entries\n", + "Loaded ST_tW_antitop_5f_inclusiveDecays : 9586 entries\n", + "Loaded ST_t-channel_antitop_4f_InclusiveDecays : 19552 entries\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/postprocessing.py:913: UserWarning: No events for WJetsToQQ_HT-200to400!\n", + " warnings.warn(f\"No events for {sample}!\", stacklevel=1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded WJetsToQQ_HT-800toInf : 142130 entries\n", + "Loaded WJetsToQQ_HT-600to800 : 33598 entries\n", + "Loaded WJetsToQQ_HT-400to600 : 293 entries\n", + "Loaded ZJetsToQQ_HT-200to400 : 1 entries\n", + "Loaded ZJetsToQQ_HT-400to600 : 593 entries\n", + "Loaded ZJetsToQQ_HT-600to800 : 51577 entries\n", + "Loaded ZJetsToQQ_HT-800toInf : 148021 entries\n", + "Loaded WW : 1894 entries\n", + "Loaded ZZ : 729 entries\n", + "Loaded WZ : 3146 entries\n", + "Loaded GluGluHToBB : 12018 entries\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/postprocessing.py:905: UserWarning: No parquet directory for VBFHToBB!\n", + " warnings.warn(f\"No parquet directory for {sample}!\", stacklevel=1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded ZH_HToBB_ZToQQ : 29277 entries\n", + "Loaded WplusH_HToBB_WToQQ : 16362 entries\n", + "Loaded WminusH_HToBB_WToQQ : 21367 entries\n", + "Loaded ggZH_HToBB_ZToQQ : 22517 entries\n", + "Loaded ttHToBB : 193853 entries\n", + "Loaded VBFHToWWToAny_M-125_TuneCP5_withDipoleRecoil : 72 entries\n", + "Loaded HWminusJ_HToWW_M-125 : 1386 entries\n", + "Loaded GluGluHToWW_Pt-200ToInf_M-125 : 652 entries\n", + "Loaded HWplusJ_HToWW_M-125 : 1841 entries\n", + "Loaded ttHToNonbb_M125 : 52851 entries\n", + "Loaded HZJ_HToWW_M-125 : 10787 entries\n", + "Loaded JetHT_Run2016C_HIPM : 167582 entries\n", + "Loaded JetHT_Run2016D_HIPM : 287156 entries\n", + "Loaded JetHT_Run2016E_HIPM : 273569 entries\n", + "Loaded JetHT_Run2016B_ver2_HIPM : 423991 entries\n", + "Loaded JetHT_Run2016F_HIPM : 178714 entries\n" + ] + }, + { + "ename": "KeyError", + "evalue": "'VBFHbb'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 15\u001b[0m\n\u001b[1;32m 4\u001b[0m cutflow \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(\n\u001b[1;32m 5\u001b[0m index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mlist\u001b[39m(samples\u001b[38;5;241m.\u001b[39mkeys()) \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(nonres_samples\u001b[38;5;241m.\u001b[39mkeys()) \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(res_samples\u001b[38;5;241m.\u001b[39mkeys())\n\u001b[1;32m 6\u001b[0m )\n\u001b[1;32m 8\u001b[0m events_dict \u001b[38;5;241m=\u001b[39m postprocessing\u001b[38;5;241m.\u001b[39mload_samples(\n\u001b[1;32m 9\u001b[0m samples_dir,\n\u001b[1;32m 10\u001b[0m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mnonres_samples, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mres_samples, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39msamples},\n\u001b[1;32m 11\u001b[0m year,\n\u001b[1;32m 12\u001b[0m load_filters,\n\u001b[1;32m 13\u001b[0m )\n\u001b[0;32m---> 15\u001b[0m \u001b[43mutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_to_cutflow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevents_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPreselection\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfinalWeight\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcutflow\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m cutflow\n", + "File \u001b[0;32m~/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/utils.py:298\u001b[0m, in \u001b[0;36madd_to_cutflow\u001b[0;34m(events_dict, key, weight_key, cutflow)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21madd_to_cutflow\u001b[39m(\n\u001b[1;32m 296\u001b[0m events_dict: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, pd\u001b[38;5;241m.\u001b[39mDataFrame], key: \u001b[38;5;28mstr\u001b[39m, weight_key: \u001b[38;5;28mstr\u001b[39m, cutflow: pd\u001b[38;5;241m.\u001b[39mDataFrame\n\u001b[1;32m 297\u001b[0m ):\n\u001b[0;32m--> 298\u001b[0m cutflow[key] \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 299\u001b[0m np\u001b[38;5;241m.\u001b[39msum(events_dict[sample][weight_key])\u001b[38;5;241m.\u001b[39msqueeze() \u001b[38;5;28;01mfor\u001b[39;00m sample \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(cutflow\u001b[38;5;241m.\u001b[39mindex)\n\u001b[1;32m 300\u001b[0m ]\n", + "File \u001b[0;32m~/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/utils.py:299\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21madd_to_cutflow\u001b[39m(\n\u001b[1;32m 296\u001b[0m events_dict: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, pd\u001b[38;5;241m.\u001b[39mDataFrame], key: \u001b[38;5;28mstr\u001b[39m, weight_key: \u001b[38;5;28mstr\u001b[39m, cutflow: pd\u001b[38;5;241m.\u001b[39mDataFrame\n\u001b[1;32m 297\u001b[0m ):\n\u001b[1;32m 298\u001b[0m cutflow[key] \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m--> 299\u001b[0m np\u001b[38;5;241m.\u001b[39msum(\u001b[43mevents_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43msample\u001b[49m\u001b[43m]\u001b[49m[weight_key])\u001b[38;5;241m.\u001b[39msqueeze() \u001b[38;5;28;01mfor\u001b[39;00m sample \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(cutflow\u001b[38;5;241m.\u001b[39mindex)\n\u001b[1;32m 300\u001b[0m ]\n", + "\u001b[0;31mKeyError\u001b[0m: 'VBFHbb'" + ] + } + ], "source": [ "systematics = {year: {}}\n", "\n", @@ -146,17 +228,10 @@ ")\n", "\n", "events_dict = postprocessing.load_samples(\n", - " sig_samples_dir,\n", - " {**nonres_samples, **res_samples},\n", - " year,\n", - " new_filters,\n", - ")\n", - "\n", - "events_dict |= postprocessing.load_samples(\n", " samples_dir,\n", - " samples,\n", + " {**nonres_samples, **res_samples, **samples},\n", " year,\n", - " new_filters,\n", + " load_filters,\n", ")\n", "\n", "utils.add_to_cutflow(events_dict, \"Preselection\", \"finalWeight\", cutflow)\n", diff --git a/src/HHbbVV/postprocessing/bash_scripts/ControlPlots.sh b/src/HHbbVV/postprocessing/bash_scripts/ControlPlots.sh index c80f3fba..b858e147 100755 --- a/src/HHbbVV/postprocessing/bash_scripts/ControlPlots.sh +++ b/src/HHbbVV/postprocessing/bash_scripts/ControlPlots.sh @@ -17,7 +17,8 @@ MAIN_DIR="../../.." TAG="" resonant="--resonant" -samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150" +# samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150" +samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80" hem2d="--HEM2d" options=$(getopt -o "" --long "nonresonant,nohem2d,tag:" -- "$@") diff --git a/src/HHbbVV/postprocessing/bash_scripts/MassPlots.sh b/src/HHbbVV/postprocessing/bash_scripts/MassPlots.sh index 15f731d0..062af3ce 100755 --- a/src/HHbbVV/postprocessing/bash_scripts/MassPlots.sh +++ b/src/HHbbVV/postprocessing/bash_scripts/MassPlots.sh @@ -46,11 +46,11 @@ if [[ -z $TAG ]]; then fi # for year in 2016APV 2016 2017 2018 -for year in 2016APV +for year in 2016 do python -u postprocessing.py --control-plots --year $year --resonant \ --data-dir "${MAIN_DIR}/../data/skimmer/24Mar5AllYears" \ --sig-samples $samples \ --plot-dir "${MAIN_DIR}/plots/PostProcessing/$TAG" \ - --no-filters --control-plot-vars "bbFatJetParticleNetMass" "bbFatJetMsd" "VVFatJetParticleNetMass" "VVFatJetMsd" + --mass-plots done diff --git a/src/HHbbVV/postprocessing/postprocessing.py b/src/HHbbVV/postprocessing/postprocessing.py index 4d8457f0..9758c922 100644 --- a/src/HHbbVV/postprocessing/postprocessing.py +++ b/src/HHbbVV/postprocessing/postprocessing.py @@ -94,7 +94,7 @@ class Region: bins=[20, 50, 250], significance_dir="bin", ), - ShapeVar(var="bbFatJetMsd", label=r"$m^{bb}_{msd}$ (GeV)", bins=[20, 0, 300]), + ShapeVar(var="bbFatJetMsd", label=r"$m^{bb}_{msd}$ (GeV)", bins=[20, 50, 250]), ShapeVar(var="bbFatJetParticleNetMD_Txbb", label=r"$T^{bb}_{Xbb}$", bins=[20, 0.8, 1]), ShapeVar(var="VVFatJetEta", label=r"$\eta^{VV}$", bins=[20, -2.4, 2.4]), ShapeVar(var="VVFatJetPt", label=r"$p^{VV}_T$ (GeV)", bins=[20, 300, 2300]), @@ -124,6 +124,15 @@ class Region: ] +# for msd vs mreg comparison plots only +mass_plot_vars = [ + ShapeVar(var="bbFatJetParticleNetMass", label=r"$m^{bb}_{reg}$ (GeV)", bins=[30, 0, 300]), + ShapeVar(var="bbFatJetMsd", label=r"$m^{bb}_{msd}$ (GeV)", bins=[30, 0, 300]), + ShapeVar(var="VVFatJetParticleNetMass", label=r"$m^{VV}_{reg}$ (GeV)", bins=[30, 0, 300]), + ShapeVar(var="VVFatJetMsd", label=r"$m^{VV}_{msd}$ (GeV)", bins=[30, 0, 300]), +] + + def get_nonres_selection_regions( year: str, txbb_wp: str = "MP", @@ -432,24 +441,26 @@ def main(args): # Control plots if args.control_plots: print("\nMaking control plots\n") + plot_vars = mass_plot_vars if args.mass_plots else control_plot_vars if len(args.control_plot_vars): - for var in control_plot_vars.copy(): + for var in plot_vars.copy(): if var.var not in args.control_plot_vars: - control_plot_vars.remove(var) + plot_vars.remove(var) - print("Plotting: ", [var.var for var in control_plot_vars]) + print("Plotting: ", [var.var for var in plot_vars]) control_plots( events_dict, bb_masks, sig_keys, - control_plot_vars, + plot_vars, args.control_plots_dir, args.year, bg_keys=args.bg_keys, sig_scale_dict={"HHbbVV": 1e5, "VBFHHbbVV": 2e6} | {key: 2e4 for key in res_sig_keys}, # sig_splits=sig_splits, HEM2d=args.HEM2d, + same_ylim=args.mass_plots, show=False, ) @@ -910,7 +921,11 @@ def load_samples( # no parquet directory? if not parquet_path.exists(): - warnings.warn(f"No parquet directory for {sample}!", stacklevel=1) + if not ( + (year == "2016" and sample.endswith("HIPM")) + or (year == "2016APV" and not sample.endswith("HIPM")) + ): # don't complain about 2016/HIPM mismatch + warnings.warn(f"No parquet directory for {sample}!", stacklevel=1) continue # print(f"Loading {sample}") @@ -973,7 +988,7 @@ def _load_samples(args, samples, sig_samples, cutflow): print("Samples: ", list(events_dict.keys())) - utils.add_to_cutflow(events_dict, "Pre-selection", "weight", cutflow) + utils.add_to_cutflow(events_dict, "Pre-selection", "finalWeight", cutflow) print("") # print weighted sample yields @@ -1406,6 +1421,7 @@ def control_plots( combine_pdf: bool = True, HEM2d: bool = False, plot_significance: bool = False, + same_ylim: bool = False, show: bool = False, log: tuple[bool, str] = "both", ): @@ -1416,6 +1432,9 @@ def control_plots( control_plot_vars (Dict[str, Tuple]): Dictionary of variables to plot, formatted as {var1: ([num bins, min, max], label), var2...}. sig_splits: split up signals into different plots (in case there are too many for one) + HEM2d: whether to plot 2D hists of FatJet phi vs eta for bb and VV jets as a check for HEM cleaning. + plot_significance: whether to plot the significance as well as the ratio plot. + same_ylim: whether to use the same y-axis limits for all plots. log: True or False if plot on log scale or not - or "both" if both. """ @@ -1440,6 +1459,8 @@ def control_plots( events_dict, shape_var, bb_masks, weight_key=weight_key, selection=selection ) + ylim = np.max([h.values() for h in hists.values()]) if same_ylim else None + if HEM2d and year == "2018": hists["HEM2d"] = hists_HEM2d(events_dict, bb_masks, weight_key, selection) @@ -1474,7 +1495,7 @@ def control_plots( significance_dir=shape_var.significance_dir, show=show, log=log, - ylim=None if not log else 1e15, + ylim=ylim if not log else 1e15, ) merger_control_plots.append(name) @@ -1946,6 +1967,12 @@ def save_templates( add_bool_arg(parser, "resonant", "for resonant or nonresonant", default=False) add_bool_arg(parser, "vbf", "non-resonant VBF or inclusive", default=False) add_bool_arg(parser, "control-plots", "make control plots", default=False) + add_bool_arg( + parser, + "mass-plots", + "make mass comparison plots (filters will automatically be turned off)", + default=False, + ) add_bool_arg(parser, "bdt-plots", "make bdt sculpting plots", default=False) add_bool_arg(parser, "templates", "save m_bb templates using bdt cut", default=False) add_bool_arg( @@ -2071,4 +2098,8 @@ def save_templates( # can't do HEM cleaning for non-resonant until BDT is re-inferenced args.hem_cleaning = bool(args.resonant or args.vbf) + if args.mass_plots: + args.control_plots = True + args.filters = False + main(args)