Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/rkansal47/HHbbVV
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed May 22, 2024
2 parents 8449160 + 02feab4 commit 013ad1a
Show file tree
Hide file tree
Showing 482 changed files with 11,093 additions and 345 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@ jobs:
--durations=20
- name: Upload coverage report
uses: codecov/codecov-action@v4.3.0
uses: codecov/codecov-action@v4.4.1
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,17 @@ git clone -b v2.0.0 https://github.com/cms-analysis/CombineHarvester.git Combine
scramv1 b clean; scramv1 b
```

### Packages

To create datacards, you need to use the same cmsenv as above + these packages:

```bash
pip3 install --upgrade pip
pip3 install rhalphalib
cd /path/to/your/local/HHbbVV/repo
pip3 install -e .
```

I also add this to my .bashrc for convenience:

```
Expand Down
4 changes: 1 addition & 3 deletions inference_scans/run_law.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

####################################################################################################
# Script for running HH inference 'law' commands
#
#
# Usage:
# 0) snapshot: --snapshot
# 1) limits at point: --limpoint (--vbf) # --vbf runs it for the VBF k2v=0 point
Expand Down Expand Up @@ -198,5 +198,3 @@ if [ $impacts = 1 ]; then
--pull-range 3 \
--Snapshot-custom-args="$custom_args"
fi


79 changes: 79 additions & 0 deletions src/HHbbVV/VBF_binder/VBFKinematicsStudyRK.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,85 @@
"matching_efficiency(gen_quarks[tsel], sjets)"
]
},
{
"cell_type": "markdown",
"id": "25d84192",
"metadata": {},
"source": [
"Testing more efficient ways of saving the jets"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7e1dfeb",
"metadata": {},
"outputs": [],
"source": [
"jets = vbf_jets[tsel]\n",
"num_jets = 3\n",
"eta_jj_min = 2\n",
"jets = ak.pad_none(jets, num_jets, clip=True)\n",
"eta = jets.eta\n",
"\n",
"etas = []\n",
"i_s = []\n",
"for i in range(num_jets):\n",
" for j in range(i + 1, num_jets):\n",
" etajj = ak.fill_none(np.abs(eta[:, i] - eta[:, j]) >= eta_jj_min, False)\n",
" etas.append(etajj)\n",
" i_s.append([i, j])\n",
"\n",
"inds = np.zeros((len(jets), 2))\n",
"inds[:, 1] += 1\n",
"\n",
"eta_jj_cache = ~etas[0]\n",
"for n in range(1, len(etas)):\n",
" inds[eta_jj_cache * etas[n]] = i_s[n]\n",
" eta_jj_cache = eta_jj_cache * ~etas[n]\n",
"\n",
"i1 = inds[:, 0].astype(int)\n",
"i2 = inds[:, 1].astype(int)\n",
"\n",
"j1 = jets[np.arange(len(jets)), i1]\n",
"j2 = jets[np.arange(len(jets)), i2]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3ca6ce0",
"metadata": {},
"outputs": [],
"source": [
"PAD_VAL = -99999\n",
"num_ak4_jets = 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0de25e6c",
"metadata": {},
"outputs": [],
"source": [
"mask = np.zeros((len(jets), len(jets[0])))\n",
"mask[np.arange(len(jets)), i1] = 1\n",
"mask[np.arange(len(jets)), i2] = 1\n",
"jets[ak.Array(mask.astype(bool))]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "330d7265",
"metadata": {},
"outputs": [],
"source": [
"vbf_jets_ak8_etaminjj = jets[ak.Array(mask.astype(bool))]\n",
"np.reshape(ak.fill_none(vbf_jets_ak8_etaminjj.pt, -PAD_VAL).to_numpy(), (-1, num_ak4_jets))"
]
},
{
"cell_type": "markdown",
"id": "bf5a8917",
Expand Down
41 changes: 23 additions & 18 deletions src/HHbbVV/combine/binder/FTest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
"import mplhep as hep\n",
"import matplotlib.ticker as mticker\n",
"from pathlib import Path\n",
"from HHbbVV.postprocessing import utils\n",
"from scipy import stats\n",
"\n",
"plt.style.use(hep.style.CMS)\n",
"hep.style.use(\"CMS\")\n",
Expand All @@ -29,7 +31,7 @@
"source": [
"MAIN_DIR = Path(\"../../../../\")\n",
"\n",
"plot_dir = MAIN_DIR / \"plots/FTests/24Apr10ggF\"\n",
"plot_dir = MAIN_DIR / \"plots/FTests/24Apr26NonresBDT995\"\n",
"plot_dir.mkdir(exist_ok=True, parents=True)"
]
},
Expand All @@ -43,6 +45,13 @@
" return np.mean(toy_ts >= data_ts)\n",
"\n",
"\n",
"def p_value_fit(data_ts: float, df: float):\n",
" \"\"\"get p-value from chi^2 it rather than toy values\"\"\"\n",
" x = np.linspace(0.01, 100, 1000)\n",
" cdf = stats.chi2.cdf(x, df)\n",
" return 1 - cdf[utils.find_nearest(x, data_ts)]\n",
"\n",
"\n",
"def F_statistic(\n",
" ts_low: List[float],\n",
" ts_high: list[float],\n",
Expand Down Expand Up @@ -71,8 +80,9 @@
"metadata": {},
"outputs": [],
"source": [
"eos_cards_dir = \"/eos/uscms/store/user/rkansal/bbVV/cards/f_tests/24Apr10ggFMP9965/\"\n",
"local_cards_dir = \"/uscms/home/rkansal/hhcombine/cards/f_tests/24Apr10ggFMP9965/\"\n",
"eos_cards_dir = \"/eos/uscms/store/user/rkansal/bbVV/cards/f_tests/24Apr26NonresBDT995/ggf/\"\n",
"local_cards_dir = \"/uscms/home/rkansal/hhcombine/cards/f_tests/24Apr26NonresBDT995/ggf/\"\n",
"# test_orders = [0, 1, 2, 3, 4, 5]\n",
"test_orders = [0, 1, 2]\n",
"test_statistics = {}\n",
"\n",
Expand All @@ -85,7 +95,7 @@
"\n",
" # test statistics for toys generated by (o1, o2) order model\n",
" file = uproot.concatenate(\n",
" f\"{eos_cards_dir}/nTF_{nTF}/higgsCombineToys{tlabel}Seed*44.GoodnessOfFit.mH125.*44.root\"\n",
" f\"{eos_cards_dir}/nTF_{nTF}/higgsCombineToys{tlabel}Seed*4.GoodnessOfFit.mH125.*4.root\"\n",
" )\n",
" tdict[\"toys\"][tflabel] = np.array(file[\"limit\"])\n",
"\n",
Expand Down Expand Up @@ -164,9 +174,6 @@
"metadata": {},
"outputs": [],
"source": [
"from scipy import stats\n",
"\n",
"\n",
"def plot_tests(\n",
" data_ts: float,\n",
" toy_ts: np.ndarray,\n",
Expand All @@ -182,6 +189,7 @@
" # plot_min = min(np.min(toy_ts), data_ts, 0)\n",
" plot_min = 0\n",
" pval = p_value(data_ts, toy_ts)\n",
" res = None\n",
"\n",
" plt.figure(figsize=(12, 8))\n",
" h = plt.hist(\n",
Expand All @@ -191,14 +199,14 @@
" histtype=\"step\",\n",
" label=f\"{len(toy_ts)} Toys\",\n",
" )\n",
" plt.axvline(data_ts, color=\"#FF502E\", linestyle=\":\", label=rf\"Data ($p$-value = {pval:.2f})\")\n",
"\n",
" if fit is not None:\n",
" x = np.linspace(plot_min + 0.01, plot_max, 100)\n",
"\n",
" if fit == \"chi2\":\n",
" res = stats.fit(stats.chi2, toy_ts, [(0, 200)])\n",
" pdf = stats.chi2.pdf(x, res.params.df)\n",
" pval = p_value_fit(data_ts, res.params.df)\n",
" label = rf\"$\\chi^2_{{DoF = {res.params.df:.2f}}}$ Fit\"\n",
" elif fit == \"f\":\n",
" pdf = stats.f.pdf(x, 1, fdof2)\n",
Expand All @@ -215,6 +223,8 @@
" label=label,\n",
" )\n",
"\n",
" plt.axvline(data_ts, color=\"#FF502E\", linestyle=\":\", label=rf\"Data ($p$-value = {pval:.2f})\")\n",
"\n",
" hep.cms.label(\n",
" \"Work in Progress\",\n",
" data=True,\n",
Expand All @@ -227,7 +237,9 @@
" plt.ylabel(\"Number of Toys\")\n",
" plt.xlabel(\"Test Statistics\")\n",
"\n",
" plt.savefig(f\"{plot_dir}/{name}.pdf\", bbox_inches=\"tight\")"
" plt.savefig(f\"{plot_dir}/{name}.pdf\", bbox_inches=\"tight\")\n",
"\n",
" return res"
]
},
{
Expand All @@ -244,11 +256,11 @@
"metadata": {},
"outputs": [],
"source": [
"o1 = 1 # order being tested\n",
"o1 = 2 # order being tested\n",
"tlabel = f\"{o1}\"\n",
"\n",
"data_ts, toy_ts = test_statistics[tlabel][\"data\"][tlabel], test_statistics[tlabel][\"toys\"][tlabel]\n",
"plot_tests(data_ts, toy_ts, \"gof\" + tlabel, fit=\"chi2\", bins=20)\n",
"res = plot_tests(data_ts, toy_ts, \"gof\" + tlabel, fit=\"chi2\", bins=20)\n",
"\n",
"ord1 = o1 + 1\n",
"tflabel = f\"{ord1}\"\n",
Expand Down Expand Up @@ -288,13 +300,6 @@
" plot_tests(data_ts, toy_ts, f\"f{tlabel}_{tflabel}\", title=f\"({o1}, {o2}) vs. ({ord1}, {ord2})\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
2 changes: 1 addition & 1 deletion src/HHbbVV/combine/binder/PlotScan.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"# scan_bdt_wps = [0.6, 0.9, 0.96, 0.99, 0.997, 0.998, 0.999]\n",
"\n",
"scan_txbb_wps = [\"MP\", \"HP\"]\n",
"scan_bdt_wps = [0.9, 0.98, 0.995, 0.996, 0.9965, 0.998]\n",
"scan_bdt_wps = [0.9, 0.98, 0.995, 0.9965, 0.998]\n",
"\n",
"scan_lepton_veto = [\"Hbb\"]\n",
"scan_thww_wps = [0.4, 0.6, 0.8, 0.9, 0.94, 0.96, 0.98]\n",
Expand Down
18 changes: 17 additions & 1 deletion src/HHbbVV/combine/run_blinded.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
workspace=0
bfit=0
limits=0
toylimits=0
significance=0
dfit=0
dfit_asimov=0
Expand All @@ -52,7 +53,7 @@ mintol=0.1 # --cminDefaultMinimizerTolerance
nonresggf=1
nonresvbf=1

options=$(getopt -o "wblsdrgti" --long "workspace,bfit,limits,significance,dfit,dfitasimov,resonant,noggf,novbf,gofdata,goftoys,impactsi,impactsf:,impactsc:,bias:,seed:,numtoys:,mintol:" -- "$@")
options=$(getopt -o "wblsdrgti" --long "workspace,bfit,limits,significance,dfit,dfitasimov,toylimits,resonant,noggf,novbf,gofdata,goftoys,impactsi,impactsf:,impactsc:,bias:,seed:,numtoys:,mintol:" -- "$@")
eval set -- "$options"

while true; do
Expand All @@ -66,6 +67,9 @@ while true; do
-l|--limits)
limits=1
;;
--toylimits)
toylimits=1
;;
-s|--significance)
significance=1
;;
Expand Down Expand Up @@ -311,6 +315,18 @@ if [ $limits = 1 ]; then
fi


if [ $toylimits = 1 ]; then
echo "Expected limits (MC Unblinded) using toys"
combine -M HybridNew --LHCmode LHC-limits --saveHybridResult -m 125 -n "" -d ${wsm_snapshot}.root --snapshotName MultiDimFit -v 9 \
${unblindedparams},r=0 -s "$seed" --bypassFrequentistFit --rAbsAcc 5.0 -T 100 --clsAcc 10 \
--floatParameters "${freezeparamsblinded},r" --toysFrequentist --expectedFromGrid 0.500 2>&1 | tee $outsdir/ToysLimits.txt

# combine -M HybridNew --LHCmode LHC-limits --singlePoint 0 --saveHybridResult -m 125 -n "" -d ${wsm_snapshot}.root --snapshotName MultiDimFit -v 9 --saveToys \
# ${unblindedparams},r=0 -s "$seed" --bypassFrequentistFit --rAbsAcc 1.0 -T 100 --clsAcc 10 \
# --floatParameters "${freezeparamsblinded},r" --toysFrequentist 2>&1 | tee $outsdir/ToysLimitsSP.txt
fi


if [ $significance = 1 ]; then
echo "Expected significance (MC Unblinded)"
combine -M Significance -d ${wsm_snapshot}.root -n "" --significance -m 125 --snapshotName MultiDimFit -v 9 \
Expand Down
Loading

0 comments on commit 013ad1a

Please sign in to comment.