diff --git a/.github/workflows/continous_integration.yml b/.github/workflows/continous_integration.yml index f1638b7e..c833d513 100644 --- a/.github/workflows/continous_integration.yml +++ b/.github/workflows/continous_integration.yml @@ -26,7 +26,7 @@ jobs: # Specify the python versions to test strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11"] # Steps represent a sequence of tasks that will be executed as part of the job steps: diff --git a/flarestack/cluster/submitter.py b/flarestack/cluster/submitter.py index 3d1659a3..18667f47 100644 --- a/flarestack/cluster/submitter.py +++ b/flarestack/cluster/submitter.py @@ -1,6 +1,15 @@ +import copy import json -import os, subprocess, time, logging, shutil, copy, sys +import logging import numpy as np +import os +from pathlib import Path +import shutil +import subprocess +import sys +import time +from typing import Optional + from flarestack.shared import ( fs_dir, log_dir, @@ -15,14 +24,14 @@ from flarestack.core.minimisation import MinimisationHandler from flarestack.core.results import ResultsHandler -from pathlib import Path - logger = logging.getLogger(__name__) class Submitter(object): - submitter_dict = dict() + # in other modules, such attribute is called 'subclass' + # is this functionally different? + submitter_dict: dict[str, object] = dict() def __init__( self, @@ -302,7 +311,7 @@ class HTCondorSubmitter(Submitter): # Log path log_path = Path(log_dir) - override_log_path = None + override_log_path: Optional[Path] = None def __init__(self, *args, **kwargs): """ diff --git a/flarestack/core/angular_error_modifier.py b/flarestack/core/angular_error_modifier.py index 298611ac..63510aed 100644 --- a/flarestack/core/angular_error_modifier.py +++ b/flarestack/core/angular_error_modifier.py @@ -34,7 +34,7 @@ class BaseFloorClass(object): - subclasses = {} + subclasses: dict[str, object] = {} def __init__(self, floor_dict): self.floor_dict = floor_dict @@ -149,7 +149,7 @@ def __init__(self, floor_dict): if not os.path.isfile(self.pickle_name): self.create_pickle() else: - logger.debug("Loading from".format(self.pickle_name)) + logger.debug(f"Loading from {self.pickle_name}") with open(self.pickle_name, "r") as f: pickled_data = Pickle.load(f) @@ -182,6 +182,9 @@ def create_function(self, pickled_array): return lambda data, params: np.array([func(params) for _ in data]) +""" +# This class has been commented out since its name is redefined below. +# However, it seems QuantileFloor1D is never explicitly called in the code and is hence likely untested. @BaseFloorClass.register_subclass("quantile_floor_1d") class QuantileFloor1D(BaseQuantileFloor, BaseStaticFloor): def create_pickle(self): @@ -190,6 +193,7 @@ def create_pickle(self): def create_function(self, pickled_array): func = interp1d(pickled_array[0], pickled_array[1]) return lambda data, params: func(data["logE"]) +""" @BaseFloorClass.register_subclass("quantile_floor_1d_e") @@ -212,7 +216,7 @@ def create_function(self, pickled_array): class BaseAngularErrorModifier(object): - subclasses = {} + subclasses: dict[str, object] = {} def __init__(self, pull_dict): self.season = pull_dict["season"] @@ -245,7 +249,7 @@ def create( e_pdf_dict, floor_name="static_floor", aem_name="no_modifier", - **kwargs + **kwargs, ): pull_dict = dict() pull_dict["season"] = season @@ -335,7 +339,7 @@ def __init__(self, pull_dict): if not os.path.isfile(self.pull_name): self.create_pickle() else: - logger.debug("Loading from".format(self.pull_name)) + logger.debug(f"Loading from {self.pull_name}") with open(self.pull_name, "r") as f: self.pickled_data = Pickle.load(f) @@ -466,6 +470,9 @@ def create_dynamic(self, pickled_array): return lambda data: func(data["logE"], data["sinDec"]) +""" +# NOTE: `mypy` allowed to find some deprecations in the following __main__ block that were never noticed before, because the block is not part of any testing routine. It should be checked whether equivalent code is covered by an unit test, before removing the (now commented) block altogether. + if __name__ == "__main__": from flarestack.data.icecube.ps_tracks.ps_v002_p01 import IC86_1_dict from flarestack.analyses.angular_error_floor.plot_bias import ( @@ -530,3 +537,4 @@ def med_pull(data): # for gamma in np.linspace(1.0, 4.0, 4): # print data_loader(IC86_1_dict["exp_path"])["logE"][:8] # print np.degrees(x.f(data_loader(IC86_1_dict["exp_path"])[:8], [gamma])) +""" diff --git a/flarestack/core/energy_pdf.py b/flarestack/core/energy_pdf.py index 9b413cf5..777069ff 100644 --- a/flarestack/core/energy_pdf.py +++ b/flarestack/core/energy_pdf.py @@ -63,7 +63,7 @@ class EnergyPDF(object): A base class for Energy PDFs. """ - subclasses = {} + subclasses: dict[str, object] = {} def __init__(self, e_pdf_dict): """ diff --git a/flarestack/core/injector.py b/flarestack/core/injector.py index f5bac9b9..7e17fed7 100644 --- a/flarestack/core/injector.py +++ b/flarestack/core/injector.py @@ -64,7 +64,7 @@ def read_injector_dict(inj_dict): class BaseInjector: """Base Injector Class""" - subclasses = {} + subclasses: dict[str, object] = {} def __init__(self, season, sources, **kwargs): kwargs = read_injector_dict(kwargs) @@ -224,7 +224,7 @@ class MCInjector(BaseInjector): background. This can be either MC background, or scrambled real data. """ - subclasses = {} + subclasses: dict[str, object] = {} def __init__(self, season, sources, **kwargs): kwargs = read_injector_dict(kwargs) diff --git a/flarestack/core/llh.py b/flarestack/core/llh.py index 4789c2f9..61441c95 100644 --- a/flarestack/core/llh.py +++ b/flarestack/core/llh.py @@ -80,7 +80,7 @@ def read_llh_dict(llh_dict): class LLH(object): """Base class LLH.""" - subclasses = {} + subclasses: dict[str, object] = {} def __init__(self, season, sources, llh_dict): self.season = season @@ -900,7 +900,7 @@ def create_kwargs(self, data, pull_corrector, weight_f=None): return kwargs - def calculate_test_statistic(self, params, weights, **kwargs): + def calculate_test_statistic(self, params, weights, **kwargs) -> float: """Calculates the test statistic, given the parameters. Uses numexpr for faster calculations. @@ -920,18 +920,18 @@ def calculate_test_statistic(self, params, weights, **kwargs): # If n_s if negative, then removes the energy term from the likelihood for i, n_j in enumerate(all_n_j): - SoB_spacetime: list = kwargs["SoB_spacetime_cache"][i] + SoB_spacetime_data: list = kwargs["SoB_spacetime_cache"][i] # Switches off Energy term for negative n_s, which should in theory # be a continuous change that does not alter the likelihood for # n_s > 0 (as it is not included for n_s=0). - if len(SoB_spacetime) == 0: + if len(SoB_spacetime_data) == 0: x.append(np.array([1.0])) else: SoB_spacetime = kwargs["pull_corrector"].estimate_spatial( - gamma, SoB_spacetime + gamma, SoB_spacetime_data ) if n_j < 0: @@ -1394,13 +1394,18 @@ def find_significant_events(self, coincident_data, source): return FlareLLH(season, sources, llh_dict) +""" +# NOTE: `mypy` allowed to find some deprecations in the following __main__ block that were never noticed before, because the block is not part of any testing routine. It should be checked whether equivalent code is covered by an unit test, before removing the (now commented) block altogether. + if __name__ == "__main__": + + from flarestack.shared import fs_scratch_dir from scipy.interpolate import InterpolatedUnivariateSpline g = EnergyPDF.create({"energy_pdf_name": "PowerLaw", "gamma": 2.2}) - e_range = np.logspace(0, 7, 1e3) + e_range = np.logspace(start=0, stop=7, num=1000) f = InterpolatedUnivariateSpline(e_range, np.log(g.f(e_range))) @@ -1440,3 +1445,4 @@ def find_significant_events(self, coincident_data, source): for i in np.linspace(0.0, 10.0, 21): print(i, f([i], weights)) +""" diff --git a/flarestack/core/minimisation.py b/flarestack/core/minimisation.py index a739bfe0..2fb41865 100644 --- a/flarestack/core/minimisation.py +++ b/flarestack/core/minimisation.py @@ -93,10 +93,10 @@ class MinimisationHandler(object): the injector and the likelihood. """ - subclasses = {} + subclasses: dict[str, object] = {} # Each MinimisationHandler must specify which LLH classes are compatible - compatible_llh = [] + compatible_llh: list[str] = [] compatible_negative_n_s = False def __init__(self, mh_dict): diff --git a/flarestack/core/multiprocess_wrapper.py b/flarestack/core/multiprocess_wrapper.py index e0f102f8..96807382 100644 --- a/flarestack/core/multiprocess_wrapper.py +++ b/flarestack/core/multiprocess_wrapper.py @@ -40,7 +40,9 @@ def add_injector(self, season, sources): class MultiProcessor: queue = None - results = dict() + + # this is not used within the class, maybe it can be safely dropped + # results = dict() def __init__(self, n_cpu, **kwargs): self.queue = JoinableQueue() @@ -149,9 +151,15 @@ def run_multiprocess(n_cpu, mh_dict): logging.basicConfig(level=logging.INFO) + # os.cpu_count() may return None, deal safely with that + cpu_count = os.cpu_count() + available_cpus = cpu_count if cpu_count is not None else 1 + # 1 <= n_cpu <= 32 + n_cpu = min(max(1, available_cpus - 1), 32) + parser = argparse.ArgumentParser() parser.add_argument("-f", "--file", help="Path for analysis pkl_file") - parser.add_argument("-n", "--n_cpu", default=min(max(1, os.cpu_count() - 1), 32)) + parser.add_argument("-n", "--n_cpu", default=n_cpu) cfg = parser.parse_args() logger.info(f"N CPU available {os.cpu_count()}. Using {cfg.n_cpu}") diff --git a/flarestack/core/results.py b/flarestack/core/results.py index 09c6d0cc..4f5cd658 100644 --- a/flarestack/core/results.py +++ b/flarestack/core/results.py @@ -1,12 +1,19 @@ -import os -import pickle as Pickle +import logging import numpy as np +import os +import pickle +from pydantic import BaseModel import scipy import scipy.stats import matplotlib.cm as cm import matplotlib.colors as colors import matplotlib.animation as animation import matplotlib.pyplot as plt +from pathlib import Path +import sys +from typing import Final, Optional, Tuple + + from flarestack.shared import ( name_pickle_output_dir, plot_output_dir, @@ -16,7 +23,10 @@ flux_to_k, ) from flarestack.core.ts_distributions import ( - plot_background_ts_distribution, + DiscoverySpec, + fit_background, + calc_ts_threshold, + plot_ts_distribution, plot_fit_results, get_ts_fit_type, ) @@ -24,8 +34,7 @@ from flarestack.core.minimisation import MinimisationHandler from flarestack.core.time_pdf import TimePDF from flarestack.utils.catalogue_loader import load_catalogue -import sys -import logging + logger = logging.getLogger(__name__) @@ -34,7 +43,33 @@ class OverfluctuationError(Exception): pass +class Result(BaseModel): + background_median: Optional[float] = None + reference_ts: Optional[float] = None + reference_ts_overfluctuation_fraction: Optional[float] = None + + sensitivity_value: Optional[float] = None + sensitivity_error: Optional[Tuple[float]] = None + sensitivity_extrapolated: Optional[bool] = False + + discovery_potential_3sigma_threshold: Optional[float] = None + discovery_potential_3sigma_value: Optional[float] = None + discovery_potential_3sigma_error: Optional[Tuple[float]] = None + + discovery_potential_5sigma_threshold: Optional[float] = None + discovery_potential_5sigma_value: Optional[float] = None + discovery_potential_5sigma_error: Optional[Tuple[float]] = None # currently unused + discovery_potential_TS25_value: Optional[float] = None + discovery_potential_TS25_error: Optional[Tuple[float]] = None + + discovery_potential_extrapolated: Optional[bool] = False + + flux_to_ns: Optional[float] = None + + class ResultsHandler(object): + TS_DISTRIBUTION_SUBDIR: Final[str] = "ts_distributions" + def __init__(self, rh_dict, do_sens=True, do_disc=True, bias_error="std"): self.sources = load_catalogue(rh_dict["catalogue"]) @@ -42,15 +77,28 @@ def __init__(self, rh_dict, do_sens=True, do_disc=True, bias_error="std"): self.mh_name = rh_dict["mh_name"] self.scale = rh_dict["scale"] + # Dictionary with the analysis results. + # Loaded by `merge_pickled_data` self.results = dict() + self.pickle_output_dir = name_pickle_output_dir(self.name) - self.plot_dir = plot_output_dir(self.name) + + self.plot_dir = Path(plot_output_dir(self.name)) + + self.ts_distribution_dir = self.plot_dir / self.TS_DISTRIBUTION_SUBDIR + self.merged_dir = os.path.join(self.pickle_output_dir, "merged") self.allow_extrapolation = rh_dict.get("allow_extrapolated_sensitivity", True) self.valid = True + self.default_overfluctuation_msg = "(Almost) all tested signal injection values result in > 95% of trials above the chosen TS threshold.\n \ + This makes impossible to properly interpolate the sensitivity or discovery potential value. \n \ + Possible causes of this behaviour are:\n \ + - all the `scale` steps are too high in value;\n \ + - the `injection_weight_modifier` values of the catalogue are improperly set: check the actual number of injected neutrinos." + # Checks if the code should search for flares. By default, this is # not done. # self.flare = self.mh_name == "flare" @@ -82,6 +130,8 @@ def __init__(self, rh_dict, do_sens=True, do_disc=True, bias_error="std"): # elif self.negative_n_s: # self.ts_type = "Negative n_s" # else: + + # Inspects rh_dict for "mh_name" and determines whether to use flare fitting. self.ts_type = get_ts_fit_type(rh_dict) # print "negative_ns", self.negative_n_s @@ -93,25 +143,12 @@ def __init__(self, rh_dict, do_sens=True, do_disc=True, bias_error="std"): self.bounds = bounds self.p0 = p0 - # if cleanup: - # self.clean_merged_data() - # this will have the TS threshold values as keys and a tuple containing # (injection scale, relative overfluctuations, error on overfluctuations) # as values self.overfluctuations = dict() - self.sensitivity = np.nan - self.sensitivity_err = np.nan - self.bkg_median = np.nan - self.frac_over = np.nan - self.disc_potential = np.nan - self.disc_err = np.nan - self.disc_potential_25 = np.nan - self.disc_ts_threshold = np.nan - self.extrapolated_sens = False - self.extrapolated_disc = False - self.flux_to_ns = np.nan + self.result = Result() try: # if self.show_inj: @@ -173,13 +210,20 @@ def __str__(self): extrapolated = ( "extrapolated" if self.extrapolated_sens else "not extrapolated" ) - out += f"Sensitivity = {self.sensitivity:.2e} (+{self.sensitivity_err[0]:.2e}/-{self.sensitivity_err[1]:.2e}) [{extrapolated}]\n" - out += f"Discovery potential (5 sigma from TS distribution) = {self.disc_potential:.2e}\n" - out += f"Discovery potential (TS = 25) = {self.disc_potential_25:.2e}" + out += f"Sensitivity = {self.result.sensitivity_value:.2e} (+{self.result.sensitivity_error[0]:.2e}/-{self.result.sensitivity_error[1]:.2e}) [{extrapolated}]\n" + out += f"Discovery potential (5 sigma from TS distribution) = {self.result.discovery_potential_5sigma_value:.2e}\n" + out += f"Discovery potential (TS = 25) = {self.result.discovery_potential_TS25_value:.2e}" else: out += "Result is invalid. Check the log messages." return out + def get_background_trials(self) -> dict: + try: + return self.results[scale_shortener(0.0)] + except KeyError: + logger.error("No key equal to '0'") + return {} + @property def scales_float(self): """directly return the injected scales as floats""" @@ -255,8 +299,10 @@ def astro_values(self, e_pdf_dict): discovery potential """ - astro_sens = self.nu_astronomy(self.sensitivity, e_pdf_dict) - astro_disc = self.nu_astronomy(self.disc_potential, e_pdf_dict) + astro_sens = self.nu_astronomy(self.result.sensitivity_value, e_pdf_dict) + astro_disc = self.nu_astronomy( + self.result.discovery_potential_5sigma_value, e_pdf_dict + ) return astro_sens, astro_disc @@ -304,7 +350,7 @@ def load_injection_values(self): if os.path.isfile(path): try: with open(path, "rb") as f: - inj_values[os.path.splitext(file)[0]] = Pickle.load(f) + inj_values[os.path.splitext(file)[0]] = pickle.load(f) except EOFError as e: logger.warning(f"{path}: EOFError: {e}! Can not use this scale!") @@ -335,7 +381,7 @@ def merge_pickle_data(self): if os.path.isfile(merged_path): logger.debug(f"loading merged data from {merged_path}") with open(merged_path, "rb") as mp: - merged_data = Pickle.load(mp) + merged_data = pickle.load(mp) else: merged_data = {} @@ -344,7 +390,7 @@ def merge_pickle_data(self): try: with open(path, "rb") as f: - data = Pickle.load(f) + data = pickle.load(f) except (EOFError, IsADirectoryError): logger.warning("Failed loading: {0}".format(path)) continue @@ -367,7 +413,7 @@ def merge_pickle_data(self): raise KeyError(m) with open(merged_path, "wb") as mp: - Pickle.dump(merged_data, mp) + pickle.dump(merged_data, mp) if len(list(merged_data.keys())) > 0: self.results[scale_shortener(float(sub_dir_name))] = merged_data @@ -484,7 +530,7 @@ def find_sensitivity(self): """ try: - bkg_dict = self.results[scale_shortener(0.0)] + bkg_dict = self.get_background_trials() except KeyError: logger.error("No key equal to '0'") return @@ -492,18 +538,18 @@ def find_sensitivity(self): bkg_ts = bkg_dict["TS"] bkg_median = np.median(bkg_ts) - self.bkg_median = bkg_median + self.result.background_median = bkg_median savepath = os.path.join(self.plot_dir, "sensitivity.pdf") ( - self.sensitivity, - self.sensitivity_err, + self.result.sensitivity_value, + self.result.sensitivity_error, self.extrapolated_sens, ) = self.find_overfluctuations(bkg_median, savepath) msg = "EXTRAPOLATED " if self.extrapolated_sens else "" - logger.info(f"{msg}Sensitivity is {self.sensitivity:.3g}") + logger.info(f"{msg}Sensitivity is {self.result.sensitivity_value:.3g}") # def set_upper_limit(self, ts_val, savepath): # """Set an upper limit, based on a Test Statistic value from @@ -537,7 +583,7 @@ def find_sensitivity(self): # print "Upper limit is", "{0:.3g}".format(ul) # return ul, extrapolated - def find_overfluctuations(self, ts_val, savepath=None): + def find_overfluctuations(self, ts_val: float, savepath=None) -> tuple: """Uses the values of injection trials to fit an 1-exponential decay function to the fraction of overfluctuations above `ts_val` as a function of the injected flux (or n_s). @@ -552,14 +598,11 @@ def find_overfluctuations(self, ts_val, savepath=None): either case, a plot of the overfluctuations as a function of the injected signal is produced. """ + x = self.scales_float - x = sorted(self.results.keys()) - x_acc = [] - y = [] - - x = [scale_shortener(i) for i in sorted([float(j) for j in x])] + x_acc, y, yerr = [], [], [] - yerr = [] + self.result.reference_ts = ts_val for scale in x: ts_array = np.array(self.results[scale]["TS"]) @@ -572,7 +615,8 @@ def find_overfluctuations(self, ts_val, savepath=None): ) if scale == scale_shortener(0.0): - self.frac_over = frac + # Note: this value depends on the chosen ts_val in case of an unblinding. + self.result.reference_ts_overfluctuation_fraction = frac if len(ts_array) > 1: y.append(frac) @@ -587,9 +631,7 @@ def find_overfluctuations(self, ts_val, savepath=None): f"Fraction of overfluctuations is {frac=}, skipping plot for {scale=}" ) if len(np.where(np.array(y) < 0.95)[0]) < 2: - raise OverfluctuationError( - f"Not enough points with overfluctuations under 95%, lower injection scale!" - ) + raise OverfluctuationError(self.default_overfluctuation_msg) x = np.array(x_acc) self.overfluctuations[ts_val] = x, y, yerr @@ -671,107 +713,103 @@ def best_f(x, sd=0.0): plt.close() if len(np.where(np.array(y) < 0.95)[0]) < 2: - raise OverfluctuationError( - f"Not enough points with overfluctuations under 95%, lower injection scale!" - ) + raise OverfluctuationError(self.default_overfluctuation_msg) fit_err = np.array([fit - lower, upper - fit]).T[0] return fit, fit_err, extrapolated - def find_disc_potential(self): - ts_path = os.path.join(self.plot_dir, "ts_distributions/0.pdf") + def find_disc_potential(self) -> None: + ts_plot_filename: Path = self.ts_distribution_dir / "0.pdf" - try: - bkg_dict = self.results[scale_shortener(0.0)] - except KeyError: - logger.error("No key equal to '0'") - return + bkg_trials: dict = self.get_background_trials() - bkg_ts = bkg_dict["TS"] + bkg_ts = np.array(bkg_trials["TS"]) + bg_fit = fit_background(bkg_ts, self.ts_type) - disc_threshold = plot_background_ts_distribution( - bkg_ts, ts_path, ts_type=self.ts_type + if bg_fit is None: + # fit_background has detected that the maximum of the TS is zero, so there is not much to do. + return + + # discovery potential is assessed for: + # - a given list of significance values (3 sigma, 5 sigma), for which the TS threshold is calculated; + # - a nominal 5 sigma significance corresponding to a TS threshold of 25 (from Wilks' theorem) + # each of these scenarios is encoded in a DiscoverySpec object + SIGNIFICANCE_VALUES: Final[tuple[float, ...]] = (3.0, 5.0) + significance_specs: list[DiscoverySpec] = [ + calc_ts_threshold(bg_fit, significance=significance) + for significance in SIGNIFICANCE_VALUES + ] + significance_specs.append( + DiscoverySpec( + significance=5.0, + positive_cdf_threshold=np.NaN, + ts_threshold=25.0, + wilks=True, + ) ) - self.disc_ts_threshold = disc_threshold + # TODO: here we should check whether any DiscoverySpec has an invalid value of ts threshold? - bkg_median = np.median(bkg_ts) - x = sorted(self.results.keys()) - y = [] - y_25 = [] + # the plot of the background TS distribution will show thresholds according to the significances + plot_ts_distribution( + bkg_ts, + bg_fit, + significances=significance_specs, + path=ts_plot_filename, + ) - x = [scale_shortener(i) for i in sorted([float(j) for j in x])] + # dictonary where for for every DiscoverySpec we will store the overfluctuation fraction as a function of the injection scale + overfluctuation_fraction = dict[str, np.ndarray] + # dictionary where we will store the discovery potential flux + discovery_potential_flux = dict[str, float] - if np.isnan(disc_threshold): - logger.warning( - f"Invalid discovery threshold {disc_threshold=} will be ingnored. Using TS = 25.0 only." - ) + n_scales = len(self.scales) - for scale in x: - ts_array = np.array(self.results[scale]["TS"]) + for spec in significance_specs: + overfluctuation_fraction[spec.name] = np.zeros(shape=n_scales) - if not np.isnan(disc_threshold): - frac = float(len(ts_array[ts_array > disc_threshold])) / ( - float(len(ts_array)) - ) + for i_s, scale in enumerate(self.scales): + ts_array = np.array(self.results[scale]["TS"]) - logger.info( - "Fraction of overfluctuations is {0:.2f} above {1:.2f} (N_trials={2}) (Scale={3})".format( - frac, disc_threshold, len(ts_array), scale + for spec in significance_specs: + if not np.isnan(spec.ts_threshold): + # TODO: just assume all specs are good and avoid (re)checking isnan + frac = float(len(ts_array[ts_array > spec.ts_threshold])) / ( + float(len(ts_array)) ) - ) - - y.append(frac) - frac_25 = float(len(ts_array[ts_array > 25.0])) / (float(len(ts_array))) - - logger.info( - "Fraction of overfluctuations is {0:.2f} above 25 (N_trials={1}) (Scale={2})".format( - frac_25, len(ts_array), scale - ) - ) + logger.info( + f"Fraction of overfluctuations is {frac:.2f} above {spec.ts_threshold:.2f} (N_trials={len(ts_array)}) (scale={scale})" + ) - y_25.append(frac_25) + overfluctuation_fraction[spec.name][i_s] = frac - # if frac != 0.0: - # logger.info(f"Making plot for {scale=}, {frac=}") + # There used to be a safeguard against frac == 0 here. + # Also this is independent from the overfluctuation fraction. Can it be decoupled? self.make_plots(scale) - # else: - # logger.warning( - # f"Fraction of overfluctuations is {frac=}, skipping plot for {scale=}" - # ) - - x = np.array([float(s) for s in x]) - - x_flux = k_to_flux(x) - - threshold = 0.5 - - sols = [] """ - Calculate the discovery potential based on the 5-sigma threshold of the TS distribution only if the distribution is non-degenerate. Otherwise, use only the TS=25 threshold. + Loop on the discovery potential specifications and calculate the corresponding flux by interpolation. """ + discovery_trial_fraction: float = 0.5 - y_list = [y_25] - out_list = ["disc_potential_25"] - - if not np.isnan(disc_threshold): - y_list.append(y) - out_list.append("disc_potential") + x = np.array(self.scales_float) + x_flux = k_to_flux(x) - for i, y_val in enumerate(y_list): + def f(x, a, b, c): + value = scipy.stats.gamma.cdf(x, a=a, loc=b, scale=c) + return value - def f(x, a, b, c): - value = scipy.stats.gamma.cdf(x, a, b, c) - return value + for key in overfluctuation_fraction: + # define a gamma function to fit the overfluctuation fraction vs injection scale + y_vals: np.ndarray = overfluctuation_fraction[key] best_f = None try: res = scipy.optimize.curve_fit( - f, x, y_val, p0=[6, -0.1 * max(x), 0.1 * max(x)] + f, x, y_vals, p0=[6, -0.1 * max(x), 0.1 * max(x)] ) best_a = res[0][0] @@ -781,33 +819,37 @@ def f(x, a, b, c): def best_f(x): return f(x, best_a, best_b, best_c) - sol = scipy.stats.gamma.ppf(0.5, best_a, best_b, best_c) + sol = scipy.stats.gamma.ppf( + discovery_trial_fraction, best_a, best_b, best_c + ) - # "disc_potential" and "disc_potential_25" attributes are set here - # use of `setattr` makes the code a bit obscure and could be improved - setattr(self, out_list[i], k_to_flux(sol)) + discovery_potential_flux[key] = k_to_flux(sol) except RuntimeError as e: logger.warning(f"RuntimeError for discovery potential!: {e}") + # Now we plot stuff. xrange = np.linspace(0.0, 1.1 * max(x), 1000) + # Need to define a sensible output naming scheme. savepath = os.path.join(self.plot_dir, "disc" + ["", "_25"][i] + ".pdf") fig = plt.figure() ax1 = fig.add_subplot(111) - ax1.scatter(x_flux, y_val, color="black") + ax1.scatter(x_flux, y_vals, color="black") - if not isinstance(best_f, type(None)): + if best_f is not None: ax1.plot(k_to_flux(xrange), best_f(xrange), color="blue") - ax1.axhline(threshold, lw=1, color="red", linestyle="--") - ax1.axvline(self.sensitivity, lw=2, color="black", linestyle="--") - ax1.axvline(self.disc_potential, lw=2, color="red") + ax1.axhline(significance, lw=1, color="red", linestyle="--") + ax1.axvline( + self.result.sensitivity_value, lw=2, color="black", linestyle="--" + ) + ax1.axvline(self.result.discovery_potential_5sigma_value, lw=2, color="red") ax1.set_ylim(0.0, 1.0) ax1.set_xlim(0.0, k_to_flux(max(xrange))) - ax1.set_ylabel(r"Overfluctuations relative to 5 $\sigma$ Threshold") - plt.xlabel(r"Flux Normalisation @ 1GeV [ GeV$^{-1}$ cm$^{-2}$ s$^{-1}$]") + ax1.set_ylabel(r"Overfluctuations relative to 5 $\sigma$ threshold") + plt.xlabel(r"Flux normalisation @ 1GeV [ GeV$^{-1}$ cm$^{-2}$ s$^{-1}$]") if not np.isnan(self.flux_to_ns): ax2 = ax1.twiny() @@ -818,26 +860,33 @@ def best_f(x): fig.savefig(savepath) plt.close() - if self.disc_potential > max(x_flux): - self.extrapolated_disc = True + # we store the output in the result attribute + # having these hardcoded keys is a bit ugly + self.result.discovery_potential_3sigma_value = discovery_potential_flux[ + "3 sigma" + ] + self.result.discovery_potential_5sigma_value = discovery_potential_flux[ + "5 sigma" + ] + self.result.discovery_potential_TS25_value = discovery_potential_flux[ + "5 sigma (Wilks)" + ] - msg = "" + if self.result.discovery_potential_5sigma_value > max(x_flux): + self.extrapolated_disc = True - if self.extrapolated_disc: - msg = "EXTRAPOLATED " + msg = "EXTRAPOLATED " if self.extrapolated_disc else "" logger.info( - "{0}Discovery Potential is {1:.3g}".format(msg, self.disc_potential) - ) - logger.info( - "Discovery Potential (TS=25) is {0:.3g}".format(self.disc_potential_25) + f"{msg}Discovery potential is {self.result.discovery_potential_5sigma_value:.3g}" ) + logger.info(f"Discovery potential (Wilks, TS = 25) is {disc_potential_25:.3g}") def noflare_plots(self, scale): ts_array = np.array(self.results[scale]["TS"]) ts_path = os.path.join(self.plot_dir, "ts_distributions/" + str(scale) + ".pdf") - plot_background_ts_distribution(ts_array, ts_path, ts_type=self.ts_type) + plot_ts_distribution(ts_array, ts_path, ts_type=self.ts_type) param_path = os.path.join(self.plot_dir, "params/" + str(scale) + ".pdf") @@ -922,13 +971,16 @@ def ts_distribution_evolution(self): all_scales_floats = [float(sc) for sc in all_scales] logger.debug("all scales: " + str(all_scales_floats)) - logger.debug("sensitivity scale: " + str(flux_to_k(self.sensitivity))) + logger.debug( + "sensitivity scale: " + str(flux_to_k(self.result.sensitivity_value)) + ) sens_scale = all_scales[ - all_scales_floats >= np.array(flux_to_k(self.sensitivity)) + all_scales_floats >= np.array(flux_to_k(self.result.sensitivity_value)) ][0] disc_scale = all_scales[ - all_scales_floats >= np.array(flux_to_k(self.disc_potential)) + all_scales_floats + >= np.array(flux_to_k(self.result.discovery_potential_5sigma_value)) ][0] scales = [all_scales[0], sens_scale, disc_scale] @@ -968,7 +1020,10 @@ def ts_distribution_evolution(self): label="signal: {:.2} signal neutrinos".format(n_s[1]), ) ax.axvline( - self.bkg_median, ls="--", label="sensitivity threshold", color="orange" + self.result.background_median, + ls="--", + label="sensitivity threshold", + color="orange", ) ax.hist( @@ -1008,7 +1063,7 @@ def ts_distribution_evolution(self): # ts_path = self.plot_dir + source + "/ts_distributions/" + str( # scale) + ".pdf" # - # plot_background_ts_distribution(ts_array, ts_path, + # plot_ts_distribution(ts_array, ts_path, # ts_type=self.ts_type) # # param_path = self.plot_dir + source + "/params/" + str(scale) + \ diff --git a/flarestack/core/spatial_pdf.py b/flarestack/core/spatial_pdf.py index af774fe2..1f9d2042 100644 --- a/flarestack/core/spatial_pdf.py +++ b/flarestack/core/spatial_pdf.py @@ -32,7 +32,7 @@ def __init__(self, spatial_pdf_dict, season): class SignalSpatialPDF: """Base Signal Spatial PDF class.""" - subclasses = {} + subclasses: dict[str, object] = {} def __init__(self, spatial_pdf_dict): pass @@ -202,7 +202,7 @@ def signal_spatial(source, cut_data): class BackgroundSpatialPDF: - subclasses = {} + subclasses: dict[str, object] = {} def __init__(self, spatial_pdf_dict, season): pass diff --git a/flarestack/core/time_pdf.py b/flarestack/core/time_pdf.py index 32ec3df0..30605169 100644 --- a/flarestack/core/time_pdf.py +++ b/flarestack/core/time_pdf.py @@ -2,6 +2,7 @@ import numpy as np from scipy.interpolate import interp1d + logger = logging.getLogger(__name__) @@ -94,8 +95,8 @@ def read_t_pdf_dict(t_pdf_dict): return t_pdf_dict -class TimePDF(object): - subclasses = {} +class TimePDF: + subclasses: dict[str, object] = {} def __init__(self, t_pdf_dict, livetime_pdf=None): self.t_dict = t_pdf_dict diff --git a/flarestack/core/ts_distributions.py b/flarestack/core/ts_distributions.py index 68aea864..c5d3da80 100644 --- a/flarestack/core/ts_distributions.py +++ b/flarestack/core/ts_distributions.py @@ -1,13 +1,16 @@ +import logging +import matplotlib.pyplot as plt import numpy as np import os -import matplotlib.pyplot as plt +from pathlib import Path +from pydantic import BaseModel import scipy.optimize, scipy.stats from scipy.stats import norm -import logging +from typing import Optional + logger = logging.getLogger(__name__) -raw_five_sigma = norm.cdf(5) n_bins = 100 @@ -26,10 +29,15 @@ def get_ts_fit_type(mh_dict): return ts_fit_type -# Taken via Alex Stasik from Thomas Kintscher +class Chi2Generic: + """Abstract class to be used as base class for custom chi2 functions.""" + + _res: scipy.optimize.OptimizeResult + _f: scipy.stats._continuous_distns.chi2_gen -class Chi2_LeftTruncated(object): +# Taken via Alex Stasik from Thomas Kintscher +class Chi2_LeftTruncated(Chi2Generic): """A class similar to the ones from scipy.stats allowing to fit left-truncated chi^2 distributions. """ @@ -113,7 +121,7 @@ def __str__(self): ) -class Chi2_one_side(object): +class Chi2_one_side(Chi2Generic): def __init__(self, data): p_start = [2.0, -1.0, 1.0] p_start = [1.3] @@ -137,7 +145,7 @@ def func(p): self.sigma = sigma -class Chi2_one_side_free(object): +class Chi2_one_side_free(Chi2Generic): def __init__(self, data): # p_start = [4., -1., 1.] p_start = [2.0, -1.0, 1.0] @@ -168,44 +176,33 @@ def func(p): self.scale = res.x[2] -def fit_background_ts(ts_array, ts_type): - mask = ts_array > 0.0 - frac_over = float(len(ts_array[mask])) / (float(len(ts_array))) - threshold_err = 0.0 +class BackgroundFit(BaseModel): + df: float # degrees of freedom + loc: float # mean + scale: float # variance + frac_positive: float # fraction of positive TS values + t_err: float # threshold error (?) - res = [] - labs = [] - cols = [] + @property + def frac_nonpositive(self): + return 1 - self.frac_positive - if np.sum(mask) > 0.0: - res.append(ts_array[mask]) - labs.append("TS > 0") - cols.append("black") - if np.sum(~mask) > 0.0: - res.append(ts_array[~mask]) - labs.append("TS <= 0") - cols.append("grey") - # if len(res) > 1: - # res = np.array(res, dtype=object) - # res.shape = (2, 1) +def fit_background_ts(ts_array: np.ndarray, ts_type: str) -> BackgroundFit: + """ + Fit the background TS distribution + """ + mask = ts_array > 0.0 - plt.hist( - res, - bins=n_bins, - lw=2, - histtype="step", - color=cols, - label=labs, - density=True, - stacked=True, - ) + frac_positive = float(len(ts_array[mask])) / (float(len(ts_array))) + + threshold_err = 0.0 if ts_type == "flare": - chi2 = Chi2_LeftTruncated(ts_array) + chi2: Chi2Generic = Chi2_LeftTruncated(ts_array) if chi2._res.success: - frac_over = 1.0 + frac_positive = 1.0 df = chi2._f.args[0] loc = chi2._f.args[1] @@ -244,7 +241,7 @@ def fit_background_ts(ts_array, ts_type): scale = 1.0 else: - frac_over = 1.0 + frac_positive = 1.0 elif ts_type in ["standard", "negative_ns"]: chi2 = Chi2_one_side(ts_array[ts_array > 0.0]) @@ -255,9 +252,11 @@ def fit_background_ts(ts_array, ts_type): threshold_err = float(chi2.sigma) else: - raise Exception("ts_type " + str(ts_type) + " not recognised!") + raise Exception(f"ts_type {ts_type} not recognised!") - return df, loc, scale, frac_over, threshold_err + return BackgroundFit( + df=df, loc=loc, scale=scale, frac_positive=frac_positive, t_err=threshold_err + ) def plot_expanded_negative(ts_array, path): @@ -268,7 +267,7 @@ def plot_expanded_negative(ts_array, path): lw=2, histtype="step", color=["black", "grey"], - label=["TS > 0", "TS <= 0"], + label=["TS > 0", "TS < 0"], density=True, stacked=True, ) @@ -276,75 +275,184 @@ def plot_expanded_negative(ts_array, path): med = np.median(ts_array) plt.yscale("log") - plt.xlabel(r"Test Statistic ($\lambda$)") + plt.xlabel(r"Test statistic ($\lambda$)") plt.legend(loc="upper right") plt.savefig(path[:-4] + "_expanded.pdf") plt.close() -def plot_background_ts_distribution( - ts_array, path, ts_type="Standard", ts_val=None, mock_unblind=False -): - try: - os.makedirs(os.path.dirname(path)) - except OSError: - pass +def filter_nan(values: list) -> np.ndarray: + arr = np.array(values) - ts_array = np.array(ts_array) + nan_count = np.sum(np.isnan(arr)) - if np.sum(np.isnan(ts_array)) > 0: - logger.warning( - "TS distribution has", np.sum(np.isnan(ts_array)), "nan entries." - ) + if nan_count > 0: + logger.warning(f"TS distribution has {nan_count} NaN entries.") - ts_array = ts_array[~np.isnan(ts_array)] + return arr[~np.isnan(arr)] - max_ts = np.max(ts_array) - if np.median(ts_array) < 0.0: - plot_expanded_negative(ts_array, path) +def fit_background( + ts_values: np.ndarray, ts_type: str = "standard" +) -> Optional[BackgroundFit]: + """Wrapper around fit_background_ts + + Args: + ts_values (list): list of TS values from background trials + ts_type (str, optional): Type of test statistic. Defaults to "standard". + + Returns: + BackgroundFit: result of the background fit. + """ + ts_arr = filter_nan(ts_values) + max_ts = np.max(ts_arr) if max_ts == 0: logger.warning( - f"Maximum of TS is {max_ts=}, unable to calculate discovery threshold (too few trials?)" + f"Maximum of TS is zero, will be unable to calculate any TS threshold (too few trials?)" ) - return np.NaN + # maybe a BackgroundFit with invalid values would be + return None - fig = plt.figure() + return fit_background_ts(ts_arr, ts_type) - ax = plt.subplot(1, 1, 1) - df, loc, scale, frac_over, t_err = fit_background_ts(ts_array, ts_type) +class DiscoverySpec(BaseModel): + """For a given background TS distribution, this object holds a significance value, the corresponding CDF threshold for the distribution of positve TS values and the interpolated TS value threshold.""" + + significance: float + positive_cdf_threshold: float + ts_threshold: float + # true if ts_threshold comes from Wilks' theorem + # i.e. 5 sigma => TS = 25 + wilks: Optional[bool] = False + + @property + def name(self) -> str: + if self.wilks: + return f"{self.significance} sigma (Wilks)" + else: + return f"{self.significance} sigma" + + # @property + # def cdf_threshold(self): + # return norm.cdf(self.significance) + # + # @property + # def positive_cdf_threshold(self, bg_fit: BackgroundFit): + # return (self.cdf_threshold - bg_fit.frac_nonpositive) / bg_fit.frac_positive + # + # @property + # def ts_threshold(self, bg_fit: BackgroundFit): + # return scipy.stats.chi2.ppf(self.positive_cdf_threshold, bg_fit.df, bg_fit.loc, bg_fit.scale) + + +def calc_ts_threshold(bg_fit: BackgroundFit, significance: float) -> DiscoverySpec: + # cumulative fraction of trials required to reach the desired significance + cdf_threshold = norm.cdf(significance) + # determine corresponding threshold for the CDF of + positive_cdf_threshold = ( + cdf_threshold - bg_fit.frac_nonpositive + ) / bg_fit.frac_positive + ts_threshold = scipy.stats.chi2.ppf( + positive_cdf_threshold, bg_fit.df, bg_fit.loc, bg_fit.scale + ) + return DiscoverySpec( + significance=significance, + positive_cdf_threshold=positive_cdf_threshold, + ts_threshold=ts_threshold, + ) + + +def plot_ts_distribution( + ts_values: np.ndarray, + bg_fit: BackgroundFit, + significances: list[DiscoverySpec], + path: Path, + ts_val: Optional[float] = None, + mock_unblind: bool = False, +) -> None: + path.mkdir(exist_ok=True) + + # find max ts + ts_arr = filter_nan(ts_values) + max_ts = np.max(ts_arr) + + if np.median(ts_arr) < 0.0: + # plot separately the positive and negative values of TS + plot_expanded_negative(ts_arr, path) - frac_under = 1 - frac_over + fig = plt.figure() + + ax = plt.subplot(1, 1, 1) - five_sigma = (raw_five_sigma - frac_under) / (1.0 - frac_under) + # histogram was previously done in fit_background_ts() + ts_data, labels, colors = [], [], [] - plt.axhline(frac_over * (1 - five_sigma), color="r", linestyle="--") + mask = ts_arr > 0.0 + if np.sum(mask) > 0.0: + ts_data.append(ts_arr[mask]) + labels.append("TS > 0") + colors.append("black") + if np.sum(~mask) > 0.0: + ts_data.append(ts_arr[~mask]) + labels.append("TS <= 0") + colors.append("grey") + plt.hist( + ts_data, + bins=n_bins, + lw=2, + histtype="step", + color=colors, + label=labels, + density=True, + stacked=True, + ) - disc_potential = scipy.stats.chi2.ppf(five_sigma, df, loc, scale) + for significance in significances: + plt.axhline( + bg_fit.frac_positive * (1 - significance.positive_cdf_threshold), + color="r", + linestyle="--", + ) + plt.axvline( + significance.ts_threshold, + color="r", + label=rf"{significance.significance} $\sigma$ threshold", + ) - x_range = np.linspace(0.0, max(max_ts, disc_potential), 100) + # determine range of x-axis (TS) + max_ts_threshold = max( + [significance.ts_threshold for significance in significances] + ) + x_range = np.linspace(0.0, max(max_ts, max_ts_threshold), 100) plt.plot( x_range, - frac_over * scipy.stats.chi2.pdf(x_range, df, loc, scale), + bg_fit.frac_positive + * scipy.stats.chi2.pdf(x_range, bg_fit.df, bg_fit.loc, bg_fit.scale), color="blue", - label=r"$\chi^{2}$ Distribution", + label=r"$\chi^{2}$ distribution", ) - if t_err is not None: + if bg_fit.t_err is not None: plt.fill_between( x_range, - frac_over * scipy.stats.chi2.pdf(x_range, df + t_err, loc, scale), - frac_over * scipy.stats.chi2.pdf(x_range, df - t_err, loc, scale), + bg_fit.frac_positive + * scipy.stats.chi2.pdf( + x_range, bg_fit.df + bg_fit.t_err, bg_fit.loc, bg_fit.scale + ), + bg_fit.frac_positive + * scipy.stats.chi2.pdf( + x_range, bg_fit.df - bg_fit.t_err, bg_fit.loc, bg_fit.scale + ), alpha=0.1, color="blue", ) def integral(x): - return frac_under * np.sign(x) + frac_over * ( - scipy.stats.chi2.cdf(x, df, loc, scale) + return bg_fit.frac_nonpositive * np.sign(x) + bg_fit.frac_positive * ( + scipy.stats.chi2.cdf(x, bg_fit.df, bg_fit.loc, bg_fit.scale) ) plt.plot( @@ -355,18 +463,18 @@ def integral(x): label=r"1 - $\int f(x)$ (p-value)", ) - plt.axvline(disc_potential, color="r", label=r"5 $\sigma$ Threshold") - if ts_val is not None: if not isinstance(ts_val, float): ts_val = float(ts_val[0]) logger.info(f"Quantifying TS: {ts_val:.2f}") - if ts_val > np.median(ts_array): + if ts_val > np.median(ts_arr): # val = (ts_val - frac_under) / (1. - frac_under) - cdf = frac_under + frac_over * scipy.stats.chi2.cdf(ts_val, df, loc, scale) + cdf = bg_fit.frac_nonpositive + bg_fit.frac_positive * scipy.stats.chi2.cdf( + ts_val, bg_fit.df, bg_fit.loc, bg_fit.scale + ) sig = norm.ppf(cdf) @@ -375,7 +483,7 @@ def integral(x): sig = 0.0 logger.info(f"Pre-trial P-value is {1-cdf:.2E}") - logger.info(f"Significance is {sig:.2f} Sigma") + logger.info(f"Significance is {sig:.2f} sigma") plt.axvline( ts_val, @@ -388,27 +496,28 @@ def integral(x): else: plt.annotate( - "{:.1f}".format(100 * frac_under) + "{:.1f}".format(100 * bg_fit.frac_nonpositive) + "% of data in delta. \n" + r"$\chi^{2}$ Distribution:" + "\n * d.o.f.=" - + "{:.2f} \pm {:.2f}".format(df, t_err) + + "{:.2f} \pm {:.2f}".format(bg_fit.df, bg_fit.t_err) + ",\n * loc=" - + "{:.2f}".format(loc) + + "{:.2f}".format(bg_fit.loc) + " \n * scale=" - + "{:.2f}".format(scale), + + "{:.2f}".format(bg_fit.scale), xy=(0.1, 0.2), xycoords="axes fraction", fontsize=8, ) - yrange = min( - 1.0 / (float(len(ts_array)) * n_bins), - scipy.stats.chi2.pdf(disc_potential, df, loc, scale), - ) + # yrange was defined but not used, keep it commented for now + # yrange = min( + # 1.0 / (float(len(ts_arr)) * n_bins), + # scipy.stats.chi2.pdf(disc_potential, bg_fit.df, bg_fit.loc, bg_fit.scale), + # ) plt.yscale("log") - plt.xlabel(r"Test Statistic ($\lambda$)") + plt.xlabel(r"Test statistic ($\lambda$)") plt.legend(loc="upper right") if mock_unblind: @@ -417,8 +526,6 @@ def integral(x): plt.savefig(path) plt.close() - return disc_potential - def plot_fit_results(results, path, inj): # results = np.array(results) diff --git a/flarestack/data/__init__.py b/flarestack/data/__init__.py index 818c1f61..95d10e11 100644 --- a/flarestack/data/__init__.py +++ b/flarestack/data/__init__.py @@ -188,7 +188,7 @@ def data_background_model(self, **kwargs): exp = append_fields(exp, "weight", weight, usemask=False, dtypes=[float]).copy() return exp - def get_background_model(self, **kwargs): + def get_background_model(self) -> dict: """Generic Function to return background model. This could be the experimental data (if the signal contamination is small), or a weighted MC dataset. By default, uses data.""" diff --git a/flarestack/data/dataset_index.py b/flarestack/data/dataset_index.py index 2e9b1a36..ef8403c8 100644 --- a/flarestack/data/dataset_index.py +++ b/flarestack/data/dataset_index.py @@ -15,7 +15,7 @@ class DatasetIndex: def __init__(self) -> None: """constructor""" - self.index = dict() + self.index: dict[str, Dataset] = dict() def add_dataset(self, dataset: Dataset) -> None: """adds a dataset to the index @@ -26,19 +26,21 @@ def add_dataset(self, dataset: Dataset) -> None: """ self.index[dataset.name] = dataset - def add_alias(self, alias: str, name: str) -> None: - """adds an alias for a dataset - - Args: - alias (str): alias name - name (str): dataset name - """ - dest = self.index[name] - if isinstance(dest, Dataset): - self.index[alias] = name - else: - logger.warning("f{name} is already an alias, aliasing {dest} instead.") - self.index[alias] = dest + # supporting aliases does not play well with the typing consistency of dataset_index + # tentatively comment out this logic, can be removed at a later stage + # def add_alias(self, alias: str, name: str) -> None: + # """adds an alias for a dataset + # + # Args: + # alias (str): alias name + # name (str): dataset name + # """ + # dest = self.index[name] + # if isinstance(dest, Dataset): + # self.index[alias] = name + # else: + # logger.warning("f{name} is already an alias, aliasing {dest} instead.") + # self.index[alias] = dest def get_dataset(self, name: str) -> Dataset: """retrieve a dataset by name diff --git a/flarestack/data/icecube/ic_season.py b/flarestack/data/icecube/ic_season.py index 798c7550..358254bf 100644 --- a/flarestack/data/icecube/ic_season.py +++ b/flarestack/data/icecube/ic_season.py @@ -11,12 +11,12 @@ from scipy.interpolate import interp1d import logging from pathlib import Path -from typing import Tuple +from typing import Optional logger = logging.getLogger(__name__) -icecube_dataset_dir = os.environ.get("FLARESTACK_DATASET_DIR") +flarestack_dataset_dir: Optional[str] = os.environ.get("FLARESTACK_DATASET_DIR") """ Source data on the WIPAC cluster. @@ -45,19 +45,24 @@ """ mirror_7yr_dirname = "mirror-7year-PS-sens" # expected identical at all mirrors -if icecube_dataset_dir is not None: - logger.info(f"Loading datasets from {icecube_dataset_dir} (local)") +# NOTE: the following block is somehow convoluted and the logic should be restructured. +if flarestack_dataset_dir is not None: + logger.info(f"Loading datasets from {flarestack_dataset_dir} (local)") - icecube_dataset_dir = Path(icecube_dataset_dir) + icecube_dataset_dir = Path(flarestack_dataset_dir) - ref_dir_7yr = icecube_dataset_dir / mirror_7yr_dirname - if not ref_dir_7yr.is_dir(): - logger.warning(f"No 7yr sensitivity directory found at {ref_dir_7yr}") + ref_7yr_path: Path = icecube_dataset_dir / mirror_7yr_dirname + if ref_7yr_path.is_dir(): + ref_dir_7yr: Optional[Path] = ref_7yr_path + else: + logger.warning(f"No 7yr sensitivity directory found at {ref_7yr_path}") ref_dir_7yr = None - ref_10yr = icecube_dataset_dir / ref_10yr_filename - if not ref_10yr.is_file(): - logger.warning(f"No 10yr sensitivity found at {ref_10yr}") + ref_10yr_path: Path = icecube_dataset_dir / ref_10yr_filename + if ref_10yr_path.is_file(): + ref_10yr: Optional[Path] = ref_10yr_path + else: + logger.warning(f"No 10yr sensitivity found at {ref_10yr_path}") ref_10yr = None else: logger.info( @@ -68,7 +73,7 @@ DESY_sens_path = DESY_data_path / "ref_sensitivity" # Only load from central storage if $FLARESTACK_DATASET_DIR is not set. -if icecube_dataset_dir is None: +if flarestack_dataset_dir is None: # NOTE: he following block has no failsafe against changes in the directory structure. if host_server == "DESY": icecube_dataset_dir = DESY_data_path @@ -98,7 +103,7 @@ def get_dataset_dir() -> str: return dataset_dir -def get_published_sens_ref_dir() -> Tuple[Path]: +def get_published_sens_ref_dir() -> tuple[Path, Path]: """ Returns the paths to reference sensitivities. """ diff --git a/flarestack/data/icecube/public/IC86_1/parse_public_IC86_1.py b/flarestack/data/icecube/public/IC86_1/parse_public_IC86_1.py index 84eb2530..c1bf6506 100644 --- a/flarestack/data/icecube/public/IC86_1/parse_public_IC86_1.py +++ b/flarestack/data/icecube/public/IC86_1/parse_public_IC86_1.py @@ -1,3 +1,10 @@ +# type: ignore + +# exclude this file from `mypy` checking since the attribute IC86_1_dict can no longer be imported +# this also implies that this file is currently untested, otherwise such a test would fail +# if in the future its functionality is restored, remove the `type: ignore` comment + + """Script to convert public data files provided by IceCube for the first year of IC86_1 into a format useable for science with flarestack. The data files themselves are duplicates of those provided at: diff --git a/flarestack/data/simulate/simcube.py b/flarestack/data/simulate/simcube.py index 602195ff..b78e120b 100644 --- a/flarestack/data/simulate/simcube.py +++ b/flarestack/data/simulate/simcube.py @@ -236,19 +236,23 @@ def wrapper_f( simcube_dataset.add_sim_season(name, wrapper_f) -bkg_time_pdf_dict = { - "time_pdf_name": "fixed_ref_box", - "fixed_ref_time_mjd": 50000, - "pre_window": 0.0, - "post_window": 500.0, -} - -simcube_season = simcube_dataset.set_sim_params( - name="IC86-2012", - bkg_time_pdf_dict=bkg_time_pdf_dict, - bkg_flux_norm=1e8, - bkg_e_pdf_dict=bkg_e_pdf_dict, -) +# after mypy: it seems the following block of code is not compatible with the current syntax of `simcube_dataset.set_sim_params()` +# - error: Missing positional argument "bkg_flux_model" in call to "set_sim_params" [call-arg] +# this implies this module is likely untested, otherwise such a test would fail +# the following is commented out, for the time being + +# bkg_time_pdf_dict = { +# "time_pdf_name": "fixed_ref_box", +# "fixed_ref_time_mjd": 50000, +# "pre_window": 0.0, +# "post_window": 500.0, +# } +# +# simcube_season = simcube_dataset.set_sim_params( +# name="IC86-2012", +# bkg_time_pdf_dict=bkg_time_pdf_dict, +# bkg_flux_norm=1e8, +# bkg_e_pdf_dict=bkg_e_pdf_dict, +# ) # nicecube_10year = SimCubeSeason(0, 100, 1., e_pdf_dict) -# diff --git a/flarestack/misc/flare_llh_plots.py b/flarestack/misc/flare_llh_plots.py index 9d4578b7..e606ca75 100644 --- a/flarestack/misc/flare_llh_plots.py +++ b/flarestack/misc/flare_llh_plots.py @@ -102,11 +102,14 @@ def energy_ratio(e): n_bkg = 6 -s_t = [] +s_t_list = [] for i, t in enumerate(np.random.uniform(0, 1.0, n_sig)): - s_t.append((t + float(i)) * 25) -s_t = np.array(s_t) + s_t_list.append((t + float(i)) * 25) + +s_t = np.array(s_t_list) +del s_t_list + bkg_t = np.random.uniform(0, 1.0, n_bkg) * 100 all_t = np.concatenate((s_t, bkg_t)) diff --git a/flarestack/misc/plot_data_rate.py b/flarestack/misc/plot_data_rate.py index 6b598d30..8bb3e423 100644 --- a/flarestack/misc/plot_data_rate.py +++ b/flarestack/misc/plot_data_rate.py @@ -1,3 +1,9 @@ +# type: ignore + +# exclude this file from `mypy` checking since the dataset "nt_v002_p01" can no longer be imported +# this also implies that this file is currently untested, otherwise such a test would fail +# if in the future this script is updated to use a new data set, remove the `type: ignore` comment + import os import matplotlib.pyplot as plt from flarestack.data.icecube.northern_tracks.nt_v002_p01 import diffuse_8year diff --git a/flarestack/misc/verify_discovery_potential_estimation.py b/flarestack/misc/verify_discovery_potential_estimation.py index b5c16959..e887a594 100644 --- a/flarestack/misc/verify_discovery_potential_estimation.py +++ b/flarestack/misc/verify_discovery_potential_estimation.py @@ -1,3 +1,9 @@ +# type: ignore + +# exclude this file from `mypy` checking since the current implementation of `AsimovEstimator` is not compatible with the syntax used here. +# this also implies that this file is currently untested, otherwise such a test would fail +# if in the future its functionality is restored, remove the `type: ignore` comment + import numpy as np import os from flarestack.data.icecube.ps_tracks.ps_v002_p01 import ps_v002_p01 @@ -84,7 +90,7 @@ mask = np.ones_like(sindecs, dtype=bool) else: - mask = [sindecs > -0.05] + mask = sindecs > -0.05 # after mypy: changed from [sindecs > -0.05] color = ["r", "blue", "green", "y"][j] diff --git a/flarestack/shared.py b/flarestack/shared.py index 67d7837d..485cc59a 100644 --- a/flarestack/shared.py +++ b/flarestack/shared.py @@ -6,6 +6,7 @@ import zlib import logging from pathlib import Path +from typing import Optional logger = logging.getLogger(__name__) @@ -115,12 +116,12 @@ host = socket.getfqdn() +host_server: Optional[str] = None + if np.logical_or("ifh.de" in host, "zeuthen.desy.de" in host): host_server = "DESY" elif "icecube.wisc.edu" in host: host_server = "WIPAC" -else: - host_server = None # gamma_range = [1., 4.] # gamma_precision = .025 diff --git a/flarestack/utils/asimov_estimator.py b/flarestack/utils/asimov_estimator.py index f1d1733f..3333e7f7 100644 --- a/flarestack/utils/asimov_estimator.py +++ b/flarestack/utils/asimov_estimator.py @@ -162,9 +162,7 @@ def signalness(sig_over_background): if len(source_mc) == 0: logging.warning( - "Warning, no MC found for dummy source at declinbation ".format( - np.arcsin(lower), np.arcsin(upper) - ) + f"Warning, no MC found for dummy source at declination {np.arcsin(lower)} {np.arcsin(upper)}" ) ts_vals.append(0.0) n_sigs.append(0.0) diff --git a/flarestack/utils/percentile_SoB.py b/flarestack/utils/percentile_SoB.py index 30cea302..c95bfefa 100644 --- a/flarestack/utils/percentile_SoB.py +++ b/flarestack/utils/percentile_SoB.py @@ -1,3 +1,10 @@ +# type: ignore + +# exclude this file from `mypy` checking since IC86_1_dict can no longer be imported +# this also implies that this file is currently untested, otherwise such a test would fail +# if in the future its functionality is restored, remove the `type: ignore` comment + + import os import numpy as np import scipy diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..efc46314 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,24 @@ +[mypy] +packages = flarestack +exclude = analyses + +[mypy-astropy] +ignore_missing_imports = True + +[mypy-astropy.coordinates] +ignore_missing_imports = True + +[mypy-astropy.cosmology] +ignore_missing_imports = True + +[mypy-healpy] +ignore_missing_imports = True + +[mypy-scipy.*] +ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-numexpr] +ignore_missing_imports = True \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index c611acd7..b5b59ddd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.0 and should not be changed by hand. [[package]] name = "alabaster" @@ -1503,15 +1503,61 @@ files = [ {file = "mistune-2.0.4.tar.gz", hash = "sha256:9ee0a66053e2267aba772c71e06891fa8f1af6d4b01d5e84e267b4570d4d9808"}, ] +[[package]] +name = "mypy" +version = "1.5.1" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mypy-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f33592ddf9655a4894aef22d134de7393e95fcbdc2d15c1ab65828eee5c66c70"}, + {file = "mypy-1.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:258b22210a4a258ccd077426c7a181d789d1121aca6db73a83f79372f5569ae0"}, + {file = "mypy-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9ec1f695f0c25986e6f7f8778e5ce61659063268836a38c951200c57479cc12"}, + {file = "mypy-1.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:abed92d9c8f08643c7d831300b739562b0a6c9fcb028d211134fc9ab20ccad5d"}, + {file = "mypy-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:a156e6390944c265eb56afa67c74c0636f10283429171018446b732f1a05af25"}, + {file = "mypy-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ac9c21bfe7bc9f7f1b6fae441746e6a106e48fc9de530dea29e8cd37a2c0cc4"}, + {file = "mypy-1.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:51cb1323064b1099e177098cb939eab2da42fea5d818d40113957ec954fc85f4"}, + {file = "mypy-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:596fae69f2bfcb7305808c75c00f81fe2829b6236eadda536f00610ac5ec2243"}, + {file = "mypy-1.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:32cb59609b0534f0bd67faebb6e022fe534bdb0e2ecab4290d683d248be1b275"}, + {file = "mypy-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:159aa9acb16086b79bbb0016145034a1a05360626046a929f84579ce1666b315"}, + {file = "mypy-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f6b0e77db9ff4fda74de7df13f30016a0a663928d669c9f2c057048ba44f09bb"}, + {file = "mypy-1.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26f71b535dfc158a71264e6dc805a9f8d2e60b67215ca0bfa26e2e1aa4d4d373"}, + {file = "mypy-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc3a600f749b1008cc75e02b6fb3d4db8dbcca2d733030fe7a3b3502902f161"}, + {file = "mypy-1.5.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:26fb32e4d4afa205b24bf645eddfbb36a1e17e995c5c99d6d00edb24b693406a"}, + {file = "mypy-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:82cb6193de9bbb3844bab4c7cf80e6227d5225cc7625b068a06d005d861ad5f1"}, + {file = "mypy-1.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4a465ea2ca12804d5b34bb056be3a29dc47aea5973b892d0417c6a10a40b2d65"}, + {file = "mypy-1.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9fece120dbb041771a63eb95e4896791386fe287fefb2837258925b8326d6160"}, + {file = "mypy-1.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d28ddc3e3dfeab553e743e532fb95b4e6afad51d4706dd22f28e1e5e664828d2"}, + {file = "mypy-1.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:57b10c56016adce71fba6bc6e9fd45d8083f74361f629390c556738565af8eeb"}, + {file = "mypy-1.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff0cedc84184115202475bbb46dd99f8dcb87fe24d5d0ddfc0fe6b8575c88d2f"}, + {file = "mypy-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8f772942d372c8cbac575be99f9cc9d9fb3bd95c8bc2de6c01411e2c84ebca8a"}, + {file = "mypy-1.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d627124700b92b6bbaa99f27cbe615c8ea7b3402960f6372ea7d65faf376c14"}, + {file = "mypy-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:361da43c4f5a96173220eb53340ace68cda81845cd88218f8862dfb0adc8cddb"}, + {file = "mypy-1.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:330857f9507c24de5c5724235e66858f8364a0693894342485e543f5b07c8693"}, + {file = "mypy-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:c543214ffdd422623e9fedd0869166c2f16affe4ba37463975043ef7d2ea8770"}, + {file = "mypy-1.5.1-py3-none-any.whl", hash = "sha256:f757063a83970d67c444f6e01d9550a7402322af3557ce7630d3c957386fa8f5"}, + {file = "mypy-1.5.1.tar.gz", hash = "sha256:b031b9601f1060bf1281feab89697324726ba0c0bae9d7cd7ab4b690940f0b92"}, +] + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +reports = ["lxml"] + [[package]] name = "mypy-extensions" -version = "0.4.3" -description = "Experimental type system extensions for programs checked with the mypy typechecker." +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." optional = false -python-versions = "*" +python-versions = ">=3.5" files = [ - {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, - {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] [[package]] @@ -2741,7 +2787,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\")"} +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} [package.extras] aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] @@ -2957,4 +3003,4 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "2.0" python-versions = ">=3.8, <3.12" -content-hash = "a784fdbab28c5cd1ccd3dad39ca1c2f0bf50247a51a2ca386fb3a56d6e29ce27" +content-hash = "cd3a999f9b1632f49bb00f18cfe8324ec9da2982ee21f8565942d09e970db74f" diff --git a/pyproject.toml b/pyproject.toml index 547daa38..27910f65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,13 +17,13 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ] [tool.poetry.dependencies] -python = ">=3.8, <3.12" +python = ">=3.9, <3.12" astropy = "^5.1" black = "^23" coveralls = "^3.3.1" @@ -47,6 +47,10 @@ myst-parser = "^0.18.0" nbsphinx = ">=0.8.9,<0.10.0" myst-nb = ">=0.16,<0.18" + +[tool.poetry.group.dev.dependencies] +mypy = "^1.5.1" + [build-system] requires = ["poetry-core>=1.2.0"] build-backend = "poetry.core.masonry.api"