Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple quantiles for angular_resolution #290

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/changes/290.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Make it possible to pass multiple quantiles to ``pyirf.benchmarks.angular_resolution``, calculating all of them.
If only one quantile is passed, the output is the same as before.
28 changes: 21 additions & 7 deletions pyirf/benchmarks/angular_resolution.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections.abc import Sequence
import numpy as np
from astropy.table import QTable
from scipy.stats import norm
Expand All @@ -10,7 +11,8 @@


def angular_resolution(
events, energy_bins,
events,
energy_bins,
energy_type="true",
quantile=ONE_SIGMA_QUANTILE,
):
Expand All @@ -20,6 +22,8 @@ def angular_resolution(
This implementation corresponds to the 68% containment of the angular
distance distribution.

Passing a list of quantiles results in all the quantiles being calculated.

Parameters
----------
events : astropy.table.QTable
Expand All @@ -29,8 +33,8 @@ def angular_resolution(
energy_type: str
Either "true" or "reco" energy.
Default is "true".
quantile : float
Which quantile to use for the angular resolution,
quantile : list(float)
Which quantile(s) to use for the angular resolution,
by default, the containment of the 1-sigma region
of the normal distribution (~68%) is used.

Expand All @@ -52,8 +56,16 @@ def angular_resolution(
result[f"{energy_key}_center"] = 0.5 * (energy_bins[:-1] + energy_bins[1:])
result["n_events"] = 0

key = "angular_resolution"
result[key] = u.Quantity(np.nan, table["theta"].unit)
if not isinstance(quantile, Sequence):
quantile = [quantile]

if len(quantile) < 2:
keys = ["angular_resolution"]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's somewhat confusing to change the naming of the output key based on the number of quantiles. Just having a single valuye doesn't mean that that value is what should be defined as "angular resolution" (which is often defined as either 68% containment or 90% containment in different cases). Wouldn't it be better to just always call them containment_quantile_XX in all cases? or if you really want to be specific, something like angular_resolution_{value*100:.0f}pct_containment. That way there is no surprise to the user that the column names change.

I guess the issue is that somewhere else, the code looks for a magic column named "angular_resolution", but then it's also better to make that column name configurable, and have it default to the CTAO standard using the more explicit column naming.

else:
keys = [f"containment_quantile_{value * 100:.0f}" for value in quantile]

for key in keys:
result[key] = u.Quantity(np.nan, table["theta"].unit)

# if we get an empty input (no selected events available)
# we return the table filled with NaNs
Expand All @@ -63,6 +75,8 @@ def angular_resolution(
# use groupby operations to calculate the percentile in each bin
by_bin = table[valid].group_by(bin_index[valid])
for bin_idx, group in zip(by_bin.groups.keys, by_bin.groups):
result[key][bin_idx] = np.nanquantile(group["theta"], quantile)
result["n_events"][bin_idx] = len(group)
for key, value in zip(keys, quantile):
result[key][bin_idx] = np.nanquantile(group["theta"], value)
result["n_events"][bin_idx] = len(group)

return result
67 changes: 44 additions & 23 deletions pyirf/benchmarks/tests/test_angular_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
def test_empty_angular_resolution():
from pyirf.benchmarks import angular_resolution

events = QTable({
'true_energy': [] * u.TeV,
'theta': [] * u.deg,
})

table = angular_resolution(
events,
[1, 10, 100] * u.TeV
events = QTable(
{
"true_energy": [] * u.TeV,
"theta": [] * u.deg,
}
)

table = angular_resolution(events, [1, 10, 100] * u.TeV)

assert np.all(np.isnan(table["angular_resolution"]))


@pytest.mark.parametrize("unit", (u.deg, u.rad))
def test_angular_resolution(unit):
from pyirf.benchmarks import angular_resolution
Expand All @@ -32,28 +32,32 @@ def test_angular_resolution(unit):

true_resolution = np.append(np.full(N, TRUE_RES_1), np.full(N, TRUE_RES_2))


rng = np.random.default_rng(0)

events = QTable({
'true_energy': np.concatenate([
[0.5], # below bin 1 to test with underflow
np.full(N - 1, 5.0),
np.full(N - 1, 50.0),
[500], # above bin 2 to test with overflow
]) * u.TeV,
'theta': np.abs(rng.normal(0, true_resolution)) * u.deg
})
events = QTable(
{
"true_energy": np.concatenate(
[
[0.5], # below bin 1 to test with underflow
np.full(N - 1, 5.0),
np.full(N - 1, 50.0),
[500], # above bin 2 to test with overflow
]
)
* u.TeV,
"theta": np.abs(rng.normal(0, true_resolution)) * u.deg,
}
)

events['theta'] = events['theta'].to(unit)
events["theta"] = events["theta"].to(unit)

# add nans to test if nans are ignored
events["true_energy"].value[N // 2] = np.nan
events["true_energy"].value[(2 * N) // 2] = np.nan

bins = [1, 10, 100] * u.TeV
table = angular_resolution(events, bins)
ang_res = table['angular_resolution'].to(u.deg)
ang_res = table["angular_resolution"].to(u.deg)
assert len(ang_res) == 2
assert u.isclose(ang_res[0], TRUE_RES_1 * u.deg, rtol=0.05)
assert u.isclose(ang_res[1], TRUE_RES_2 * u.deg, rtol=0.05)
Expand All @@ -64,9 +68,26 @@ def test_angular_resolution(unit):
# 2 sigma coverage interval
quantile = norm(0, 1).cdf(2) - norm(0, 1).cdf(-2)
table = angular_resolution(events, bins, quantile=quantile)
ang_res = table['angular_resolution'].to(u.deg)

ang_res = table["angular_resolution"].to(u.deg)
assert len(ang_res) == 2

assert u.isclose(ang_res[0], 2 * TRUE_RES_1 * u.deg, rtol=0.05)
assert u.isclose(ang_res[1], 2 * TRUE_RES_2 * u.deg, rtol=0.05)

# 25%, 50%, 90% coverage interval
table = angular_resolution(events, bins, quantile=[0.25, 0.5, 0.9])
cov_25 = table["containment_quantile_25"].to(u.deg)
assert len(cov_25) == 2
assert u.isclose(
cov_25[0], norm(0, TRUE_RES_1).interval(0.25)[1] * u.deg, rtol=0.05
)
assert u.isclose(
cov_25[1], norm(0, TRUE_RES_2).interval(0.25)[1] * u.deg, rtol=0.05
)

cov_50 = table["containment_quantile_50"].to(u.deg)
assert u.isclose(cov_50[0], norm(0, TRUE_RES_1).interval(0.5)[1] * u.deg, rtol=0.05)
assert u.isclose(cov_50[1], norm(0, TRUE_RES_2).interval(0.5)[1] * u.deg, rtol=0.05)

cov_90 = table["containment_quantile_90"].to(u.deg)
assert u.isclose(cov_90[0], norm(0, TRUE_RES_1).interval(0.9)[1] * u.deg, rtol=0.05)
assert u.isclose(cov_90[1], norm(0, TRUE_RES_2).interval(0.9)[1] * u.deg, rtol=0.05)
Loading