From f909c39628ca0745d2ae826681f9a19222337b31 Mon Sep 17 00:00:00 2001 From: rkansal47 Date: Thu, 1 Aug 2024 12:19:36 +0530 Subject: [PATCH] inteprolate signal --- .../postprocessing/InterpolateSignal.ipynb | 376 ++++++++++++++++++ src/HHbbVV/postprocessing/plotting.py | 21 +- 2 files changed, 387 insertions(+), 10 deletions(-) create mode 100644 src/HHbbVV/postprocessing/InterpolateSignal.ipynb diff --git a/src/HHbbVV/postprocessing/InterpolateSignal.ipynb b/src/HHbbVV/postprocessing/InterpolateSignal.ipynb new file mode 100644 index 00000000..43f20f6f --- /dev/null +++ b/src/HHbbVV/postprocessing/InterpolateSignal.ipynb @@ -0,0 +1,376 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle, json, gzip\n", + "import numpy as np\n", + "import hist\n", + "from hist import Hist\n", + "\n", + "from typing import Optional, List, Dict\n", + "from copy import copy\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import mplhep as hep\n", + "from matplotlib import colors\n", + "\n", + "from tqdm import tqdm\n", + "\n", + "from pathlib import Path\n", + "import os\n", + "\n", + "from HHbbVV.hh_vars import years, bg_keys\n", + "from HHbbVV.postprocessing import datacardHelpers\n", + "from postprocessing import nonres_shape_vars as shape_vars\n", + "import plotting\n", + "\n", + "plt.rcParams.update({\"font.size\": 16})\n", + "plt.style.use(hep.style.CMS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_dir = Path(\"../../plots/Interpolate/24Aug1\")\n", + "plot_dir.mkdir(parents=True, exist_ok=True)\n", + "\n", + "templates_dir = Path(\"templates/24Apr26NonresBDT995AllSigs\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load and process templates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "templates_dict: dict[str, dict[str, Hist]] = {}\n", + "\n", + "for year in years:\n", + " with (templates_dir / f\"{year}_templates.pkl\").open(\"rb\") as f:\n", + " templates_dict[year] = datacardHelpers.rem_neg(pickle.load(f))\n", + "\n", + "templates = datacardHelpers.sum_templates(templates_dict, years)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbf_keys = [\n", + " \"VBFHHbbVV\",\n", + " \"qqHH_CV_1_C2V_1_kl_0_HHbbVV\",\n", + " \"qqHH_CV_1_C2V_1_kl_2_HHbbVV\",\n", + " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\",\n", + " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\",\n", + " \"qqHH_CV_1p5_C2V_1_kl_1_HHbbVV\",\n", + "]\n", + "\n", + "vbf_hists = {}\n", + "\n", + "for key, h in templates.items():\n", + " vbf_hists[key] = []\n", + " for vbf_key in vbf_keys:\n", + " vbf_hists[key].append(h[vbf_key, ...])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interpolation coefficients" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sympy\n", + "\n", + "csamples = [\n", + " # CV, C2V, kl\n", + " (1.0, 1.0, 1.0),\n", + " (1.0, 1.0, 0.0),\n", + " (1.0, 1.0, 2.0),\n", + " (1.0, 0.0, 1.0),\n", + " (1.0, 2.0, 1.0),\n", + " # (0.5, 1.0, 1.0),\n", + " (1.5, 1.0, 1.0),\n", + "]\n", + "\n", + "M = sympy.Matrix(\n", + " [\n", + " [\n", + " CV**2 * kl**2,\n", + " CV**4,\n", + " C2V**2,\n", + " CV**3 * kl,\n", + " CV * C2V * kl,\n", + " CV**2 * C2V,\n", + " ]\n", + " for i, (CV, C2V, kl) in enumerate(csamples)\n", + " ]\n", + ")\n", + "\n", + "# the vector of couplings\n", + "CV, C2V, kl = sympy.symbols(\"CV C2V kl\")\n", + "c = sympy.Matrix(\n", + " [\n", + " [CV**2 * kl**2],\n", + " [CV**4],\n", + " [C2V**2],\n", + " [CV**3 * kl],\n", + " [CV * C2V * kl],\n", + " [CV**2 * C2V],\n", + " ]\n", + ")\n", + "\n", + "# the vector of symbolic sample cross sections\n", + "s = sympy.Matrix([[sympy.Symbol(\"xs{}\".format(i))] for i in range(len(csamples))])\n", + "\n", + "# actual computation, i.e., matrix inversion and multiplications with vectors\n", + "M_inv = M.pinv()\n", + "coeffs = c.transpose() * M_inv\n", + "sigma = coeffs * s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_hist_interp(cv, c2v, Kl, hists):\n", + " sigma_val = sigma.subs({CV: cv, C2V: c2v, kl: Kl})\n", + " counts = []\n", + " errs = []\n", + " for i in range(len(hists[0].values())):\n", + " count = np.array(\n", + " sigma_val.subs(\n", + " {sympy.Symbol(f\"xs{j}\"): hists[j].values()[i] for j in range(len(vbf_keys))}\n", + " )\n", + " )[0][0]\n", + " err = np.array(\n", + " sigma_val.subs(\n", + " {\n", + " sympy.Symbol(f\"xs{j}\"): np.nan_to_num(\n", + " np.sqrt(hists[j].variances()[i]) / hists[j].values()[i]\n", + " )\n", + " for j in range(len(vbf_keys))\n", + " }\n", + " )\n", + " )[0][0]\n", + "\n", + " if count < 1e-12:\n", + " count = 0\n", + "\n", + " counts.append(count)\n", + " errs.append(err)\n", + "\n", + " return np.array(counts), np.array(errs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add interpolated signals to templates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "interp_points = np.arange(-1.0, 3.1, 0.1)\n", + "samples = [f\"qqHH_CV_1_C2V_{c:.1f}_kl_1_HHbbVV\" for c in interp_points]\n", + "\n", + "interp_hists = {}\n", + "\n", + "for region in [\"passvbf\", \"passggf\", \"fail\"]:\n", + " print(region)\n", + " h = Hist(\n", + " hist.axis.StrCategory(samples, name=\"Sample\"),\n", + " *templates[\"passvbf\"].axes[1:],\n", + " storage=hist.storage.Weight(),\n", + " )\n", + " for i, c in tqdm(enumerate(interp_points)):\n", + " c_h, c_err = get_hist_interp(1.0, c, 1.0, vbf_hists[region])\n", + " h.values()[i, :] = c_h\n", + " h.variances()[i, :] = (c_err * c_h) ** 2\n", + "\n", + " interp_hists[region] = h" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ctemplates = {}\n", + "\n", + "for region in [\"passvbf\", \"passggf\", \"fail\"]:\n", + " template = templates[region]\n", + " # combined sig + bg samples\n", + " csamples = list(template.axes[0]) + samples\n", + "\n", + " # new hist with all samples\n", + " ctemplate = Hist(\n", + " hist.axis.StrCategory(csamples, name=\"Sample\"),\n", + " *template.axes[1:],\n", + " storage=\"weight\",\n", + " )\n", + "\n", + " # add background hists\n", + " for sample in template.axes[0]:\n", + " sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n", + " ctemplate.view(flow=True)[sample_key_index, ...] = template[sample, ...].view(flow=True)\n", + "\n", + " # add signal hists\n", + " for sample in samples:\n", + " sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n", + " ctemplate.view(flow=True)[sample_key_index, ...] = interp_hists[region][sample, ...].view(\n", + " flow=True\n", + " )\n", + "\n", + " ctemplates[region] = ctemplate" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from hist.intervals import poisson_interval\n", + "\n", + "(\n", + " poisson_interval(ctemplates[region][\"Data\", ...].values())\n", + " - ctemplates[region][\"Data\", ...].values()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ctemplates[\"passvbf\"][\"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\", ...].values()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "selection_regions = {\n", + " \"passvbf\": \"VBF\",\n", + " \"passggf\": \"ggF\",\n", + " # \"fail\": \"Fail\",\n", + "}\n", + "\n", + "ylims = {\"passggf\": 200, \"passvbf\": 100, \"fail\": 7e5}\n", + "\n", + "sig_scale_dict = {\n", + " # \"HHbbVV\": 100,\n", + " # \"VBFHHbbVV\": 2000,\n", + " \"qqHH_CV_1_C2V_1.6_kl_1_HHbbVV\": 1,\n", + " \"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\": 1,\n", + " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 1,\n", + " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": 1,\n", + "}\n", + "\n", + "for region, region_label in selection_regions.items():\n", + " pass_region = region.startswith(\"pass\")\n", + " for i, shape_var in enumerate(shape_vars):\n", + " plot_params = {\n", + " \"hists\": ctemplates[region],\n", + " \"sig_keys\": list(sig_scale_dict.keys()),\n", + " \"bg_keys\": [],\n", + " \"sig_scale_dict\": sig_scale_dict if pass_region else None,\n", + " \"show\": True,\n", + " \"year\": \"all\",\n", + " \"ylim\": ylims[region],\n", + " \"title\": f\"Pre-fit {region_label} Region\",\n", + " \"name\": f\"{plot_dir}/interp_{region}_{shape_var.var}_signal_log.pdf\",\n", + " \"ncol\": 2, # if region == \"passvbf\" else 1,\n", + " \"ratio_ylims\": [0, 5] if region == \"passvbf\" else [0, 2],\n", + " \"cmslabel\": \"Preliminary\",\n", + " \"plot_data\": False,\n", + " \"log\": True,\n", + " }\n", + "\n", + " plotting.ratioHistPlot(**plot_params, data_err=True)\n", + "\n", + "# break\n", + "# break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python310", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/HHbbVV/postprocessing/plotting.py b/src/HHbbVV/postprocessing/plotting.py index 20f57791..d6953d7f 100644 --- a/src/HHbbVV/postprocessing/plotting.py +++ b/src/HHbbVV/postprocessing/plotting.py @@ -360,14 +360,15 @@ def ratioHistPlot( ax.set_ylabel(y_label) # background samples - hep.histplot( - [hists[sample, :] for sample in bg_keys], - ax=ax, - histtype="fill", - stack=True, - label=bg_labels, - color=bg_colours, - ) + if len(bg_keys): + hep.histplot( + [hists[sample, :] for sample in bg_keys], + ax=ax, + histtype="fill", + stack=True, + label=bg_labels, + color=bg_colours, + ) # signal samples if len(sig_scale_dict): @@ -482,8 +483,8 @@ def ratioHistPlot( # new: plotting data errors (black lines) and background errors (shaded) separately yerr = np.nan_to_num( np.abs( - poisson_interval(pre_divide_hists[data_key, ...]) - - pre_divide_hists[data_key, ...] + poisson_interval(pre_divide_hists[data_key, ...].values()) + - pre_divide_hists[data_key, ...].values() ) / (bg_tot.values() + 1e-5) )