Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interpolate signal notebook #73

Merged
merged 1 commit into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
376 changes: 376 additions & 0 deletions src/HHbbVV/postprocessing/InterpolateSignal.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,376 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pickle, json, gzip\n",
"import numpy as np\n",
"import hist\n",
"from hist import Hist\n",
"\n",
"from typing import Optional, List, Dict\n",
"from copy import copy\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import mplhep as hep\n",
"from matplotlib import colors\n",
"\n",
"from tqdm import tqdm\n",
"\n",
"from pathlib import Path\n",
"import os\n",
"\n",
"from HHbbVV.hh_vars import years, bg_keys\n",
"from HHbbVV.postprocessing import datacardHelpers\n",
"from postprocessing import nonres_shape_vars as shape_vars\n",
"import plotting\n",
"\n",
"plt.rcParams.update({\"font.size\": 16})\n",
"plt.style.use(hep.style.CMS)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot_dir = Path(\"../../plots/Interpolate/24Aug1\")\n",
"plot_dir.mkdir(parents=True, exist_ok=True)\n",
"\n",
"templates_dir = Path(\"templates/24Apr26NonresBDT995AllSigs\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load and process templates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"templates_dict: dict[str, dict[str, Hist]] = {}\n",
"\n",
"for year in years:\n",
" with (templates_dir / f\"{year}_templates.pkl\").open(\"rb\") as f:\n",
" templates_dict[year] = datacardHelpers.rem_neg(pickle.load(f))\n",
"\n",
"templates = datacardHelpers.sum_templates(templates_dict, years)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vbf_keys = [\n",
" \"VBFHHbbVV\",\n",
" \"qqHH_CV_1_C2V_1_kl_0_HHbbVV\",\n",
" \"qqHH_CV_1_C2V_1_kl_2_HHbbVV\",\n",
" \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\",\n",
" \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\",\n",
" \"qqHH_CV_1p5_C2V_1_kl_1_HHbbVV\",\n",
"]\n",
"\n",
"vbf_hists = {}\n",
"\n",
"for key, h in templates.items():\n",
" vbf_hists[key] = []\n",
" for vbf_key in vbf_keys:\n",
" vbf_hists[key].append(h[vbf_key, ...])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Interpolation coefficients"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sympy\n",
"\n",
"csamples = [\n",
" # CV, C2V, kl\n",
" (1.0, 1.0, 1.0),\n",
" (1.0, 1.0, 0.0),\n",
" (1.0, 1.0, 2.0),\n",
" (1.0, 0.0, 1.0),\n",
" (1.0, 2.0, 1.0),\n",
" # (0.5, 1.0, 1.0),\n",
" (1.5, 1.0, 1.0),\n",
"]\n",
"\n",
"M = sympy.Matrix(\n",
" [\n",
" [\n",
" CV**2 * kl**2,\n",
" CV**4,\n",
" C2V**2,\n",
" CV**3 * kl,\n",
" CV * C2V * kl,\n",
" CV**2 * C2V,\n",
" ]\n",
" for i, (CV, C2V, kl) in enumerate(csamples)\n",
" ]\n",
")\n",
"\n",
"# the vector of couplings\n",
"CV, C2V, kl = sympy.symbols(\"CV C2V kl\")\n",
"c = sympy.Matrix(\n",
" [\n",
" [CV**2 * kl**2],\n",
" [CV**4],\n",
" [C2V**2],\n",
" [CV**3 * kl],\n",
" [CV * C2V * kl],\n",
" [CV**2 * C2V],\n",
" ]\n",
")\n",
"\n",
"# the vector of symbolic sample cross sections\n",
"s = sympy.Matrix([[sympy.Symbol(\"xs{}\".format(i))] for i in range(len(csamples))])\n",
"\n",
"# actual computation, i.e., matrix inversion and multiplications with vectors\n",
"M_inv = M.pinv()\n",
"coeffs = c.transpose() * M_inv\n",
"sigma = coeffs * s"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_hist_interp(cv, c2v, Kl, hists):\n",
" sigma_val = sigma.subs({CV: cv, C2V: c2v, kl: Kl})\n",
" counts = []\n",
" errs = []\n",
" for i in range(len(hists[0].values())):\n",
" count = np.array(\n",
" sigma_val.subs(\n",
" {sympy.Symbol(f\"xs{j}\"): hists[j].values()[i] for j in range(len(vbf_keys))}\n",
" )\n",
" )[0][0]\n",
" err = np.array(\n",
" sigma_val.subs(\n",
" {\n",
" sympy.Symbol(f\"xs{j}\"): np.nan_to_num(\n",
" np.sqrt(hists[j].variances()[i]) / hists[j].values()[i]\n",
" )\n",
" for j in range(len(vbf_keys))\n",
" }\n",
" )\n",
" )[0][0]\n",
"\n",
" if count < 1e-12:\n",
" count = 0\n",
"\n",
" counts.append(count)\n",
" errs.append(err)\n",
"\n",
" return np.array(counts), np.array(errs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Add interpolated signals to templates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"interp_points = np.arange(-1.0, 3.1, 0.1)\n",
"samples = [f\"qqHH_CV_1_C2V_{c:.1f}_kl_1_HHbbVV\" for c in interp_points]\n",
"\n",
"interp_hists = {}\n",
"\n",
"for region in [\"passvbf\", \"passggf\", \"fail\"]:\n",
" print(region)\n",
" h = Hist(\n",
" hist.axis.StrCategory(samples, name=\"Sample\"),\n",
" *templates[\"passvbf\"].axes[1:],\n",
" storage=hist.storage.Weight(),\n",
" )\n",
" for i, c in tqdm(enumerate(interp_points)):\n",
" c_h, c_err = get_hist_interp(1.0, c, 1.0, vbf_hists[region])\n",
" h.values()[i, :] = c_h\n",
" h.variances()[i, :] = (c_err * c_h) ** 2\n",
"\n",
" interp_hists[region] = h"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ctemplates = {}\n",
"\n",
"for region in [\"passvbf\", \"passggf\", \"fail\"]:\n",
" template = templates[region]\n",
" # combined sig + bg samples\n",
" csamples = list(template.axes[0]) + samples\n",
"\n",
" # new hist with all samples\n",
" ctemplate = Hist(\n",
" hist.axis.StrCategory(csamples, name=\"Sample\"),\n",
" *template.axes[1:],\n",
" storage=\"weight\",\n",
" )\n",
"\n",
" # add background hists\n",
" for sample in template.axes[0]:\n",
" sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n",
" ctemplate.view(flow=True)[sample_key_index, ...] = template[sample, ...].view(flow=True)\n",
"\n",
" # add signal hists\n",
" for sample in samples:\n",
" sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n",
" ctemplate.view(flow=True)[sample_key_index, ...] = interp_hists[region][sample, ...].view(\n",
" flow=True\n",
" )\n",
"\n",
" ctemplates[region] = ctemplate"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from hist.intervals import poisson_interval\n",
"\n",
"(\n",
" poisson_interval(ctemplates[region][\"Data\", ...].values())\n",
" - ctemplates[region][\"Data\", ...].values()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ctemplates[\"passvbf\"][\"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\", ...].values()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"selection_regions = {\n",
" \"passvbf\": \"VBF\",\n",
" \"passggf\": \"ggF\",\n",
" # \"fail\": \"Fail\",\n",
"}\n",
"\n",
"ylims = {\"passggf\": 200, \"passvbf\": 100, \"fail\": 7e5}\n",
"\n",
"sig_scale_dict = {\n",
" # \"HHbbVV\": 100,\n",
" # \"VBFHHbbVV\": 2000,\n",
" \"qqHH_CV_1_C2V_1.6_kl_1_HHbbVV\": 1,\n",
" \"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\": 1,\n",
" \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 1,\n",
" \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": 1,\n",
"}\n",
"\n",
"for region, region_label in selection_regions.items():\n",
" pass_region = region.startswith(\"pass\")\n",
" for i, shape_var in enumerate(shape_vars):\n",
" plot_params = {\n",
" \"hists\": ctemplates[region],\n",
" \"sig_keys\": list(sig_scale_dict.keys()),\n",
" \"bg_keys\": [],\n",
" \"sig_scale_dict\": sig_scale_dict if pass_region else None,\n",
" \"show\": True,\n",
" \"year\": \"all\",\n",
" \"ylim\": ylims[region],\n",
" \"title\": f\"Pre-fit {region_label} Region\",\n",
" \"name\": f\"{plot_dir}/interp_{region}_{shape_var.var}_signal_log.pdf\",\n",
" \"ncol\": 2, # if region == \"passvbf\" else 1,\n",
" \"ratio_ylims\": [0, 5] if region == \"passvbf\" else [0, 2],\n",
" \"cmslabel\": \"Preliminary\",\n",
" \"plot_data\": False,\n",
" \"log\": True,\n",
" }\n",
"\n",
" plotting.ratioHistPlot(**plot_params, data_err=True)\n",
"\n",
"# break\n",
"# break"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "python310",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading
Loading