diff --git a/setup.cfg b/setup.cfg index 8ad280c2..a9ca0efc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,8 +35,10 @@ packages = kbmod.search kbmod.analysis kbmod.filters + kbmod.standardizers + kbmod.standardizers.fits_standardizers zip_safe = "False" -install_requires = +install_requires = astropy>=5.1 astroquery>=0.4.6 matplotlib>=3.5 @@ -48,6 +50,7 @@ install_requires = jplephem PyYAML>=6.0 chardet + astro_metadata_translator>=25.2023.2200 [options.extras_require] analysis = diff --git a/src/kbmod/__init__.py b/src/kbmod/__init__.py index f8a3b983..544674a0 100644 --- a/src/kbmod/__init__.py +++ b/src/kbmod/__init__.py @@ -5,9 +5,6 @@ except ImportError: warnings.warn("Unable to determine the package version. " "This is likely a broken installation.") -# lazy import analysis to arrest -# loading large libraries in them -# import the filters subdirectory from . import ( analysis, analysis_utils, @@ -18,3 +15,7 @@ result_list, run_search, ) + +from .search import PSF, RawImage, LayeredImage, ImageStack, StackSearch +from .standardizers import Standardizer, StandardizerConfig +from .image_collection import ImageCollection diff --git a/src/kbmod/image_collection.py b/src/kbmod/image_collection.py new file mode 100644 index 00000000..2a216c1a --- /dev/null +++ b/src/kbmod/image_collection.py @@ -0,0 +1,512 @@ +"""Classes for working with the input files for KBMOD. + +The ``ImageCollection`` class stores additional information for the +input FITS files that is used during a variety of analysis. +""" + +import os +import glob +import json + +from astropy.table import Table +from astropy.wcs import WCS +from astropy.utils import isiterable + +import numpy as np + +from kbmod.search import ImageStack +from .standardizers import Standardizer +from .work_unit import WorkUnit + + +__all__ = [ + "ImageCollection", +] + + +class ImageCollection: + """A collection of basic pointing, file paths, names, timestamps and other + metadata that facilitate, and make easier the construction of ImageStack + and execution of KBMOD search. + + It is recommended to construct this object by using one of its factory + methods, avoiding instantiating the object directly. When constructed by + using one of the ``from*`` methods the class interprets the data source + formats and determines the appropriate way of extracting, at least, the + required metadata from it. This behaviour can be modified by supplying a + callable as the `forceStandardizer` argument to the factory method. The + provided callable has to be an instance of `Standardizer`. See the factory + method's and `Standardizer` documentation for more information. + + Columns listed in the `ImageCollection.required_metadata` are required to + exist and be non-zero. Additional columns may or may not exist. The names + of the standardizers used to extract the metadata are also guaranteed to + exist. The `Standardizer` objects may or may not be availible, depending on + whether the data is accessible by the user. The same, by extension, applies + to properties such as bbox and WCS. + Attributes which values may or may not be availible have their own getter + methods (f.e. `get_standardizer`, `get_wcs` etc.). + + Some standardizers, such as `MultiExtensionfits`, map a single location and + timestamp to potentially many individual extensions each with a slightly + different on-sky location. Therefore the list of the standardizers does + not neccessarily match the length of the metadata table. Metadata table is + the metadata per extension, unravelled into a flat table. + + Parameters + ---------- + metadata : `~astropy.table.Table` + A table of exposure metadata properties + serializers : `list` + A list of valid serializer names, used to extract the metadata in its + row. + + Attributes + ---------- + data : `~astropy.table.Table` + Table of exposure metadata properties. + standardizers : `list` + Standardizer names used to standardize the rows of the metadata. + The names are guaranteed to exist, but the standardizer object may not + be availible. + wcs : `list` + List of `~astropy.wcs.WCS` objects that correspond to the rows + of the metadadata. + bbox : `list` + List of `dict` objects containing the pixel and on-sky coordinates of + the central and corner (0, 0) pixel. + + Raises + ------ + ValueError : + when instantiated from a Table which does not have the required + columns, or has null-values in the required columns. + """ + + required_metadata = ["location", "mjd", "ra", "dec"] + _supporting_metadata = ["std_name", "std_idx", "ext_idx", "wcs", "bbox", "config"] + + ######################## + # CONSTRUCTORS + ######################## + def _validate(self, metadata): + """Validates the required metadata exist and is not-null. + + Required metadata is the ``location`` of the target, ``mjd``, ``ra``, + ``dec`` and the standardizer-row lookup indices ``std_idx`` and + ``ext_idx``. + + Parameters + ---------- + metadata : `~astropy.table.Table` + Astropy Table containing the required metadata. + + Returns + ------- + valid : `bool` + When ``True`` the metadata is valid, when ``False`` it isn't. + message: `str` + Validation failure message, if any. + """ + if not isinstance(metadata, Table): + return False, "not an Astropy Table object." + + # if empty table + if not metadata: + return False, "an emtpy table." + + cols = metadata.columns + missing_keys = [key for key in self.required_metadata if key not in cols] + if missing_keys: + return False, f"missing required columns: {missing_keys}" + + # check that none of the actual required column entries are empty in + # some way. perhaps we should be checking np.nan too? + for rc in self.required_metadata: + if None in metadata[rc] or "" in metadata[rc]: + return False, "missing required metadata values." + + # check that standardizer to row lookup exists + missing_keys = [key for key in ["std_idx", "ext_idx"] if key not in cols] + if missing_keys: + return False, (f"missing required standardizer-row lookup indices: {missing_keys}") + + return True, "" + + def __init__(self, metadata, standardizers=None): + valid, explanation = self._validate(metadata) + if valid: + metadata.sort("mjd") + self.data = metadata + else: + raise ValueError(f"Metadata is {explanation}") + + # If standardizers are already instantiated, keep them. This keeps any + # resources they are holding onto alive, and enables in-memory stds. + # These are impossible to instantiate, but since they are already + # in-memory we don't need to and lazy-loading will skip attempts to. + # Unrelated, if it doesn't exist, add "std_name" column to metadata and + # update the n_stds entry. This is shared by all from* constructors, so + # it's just practical to do here. + # Else if standardizers are not given, assume they round-trip from rows + # and build an empty private list of stds for lazy eval. The length of + # the list is determined from table metadata or guessed from the number + # of unique targets. Table metadata is updated if necessary. + if standardizers is not None: + self._standardizers = np.array(standardizers) + if "std_name" not in metadata.columns: + self.data["std_name"] = self._standardizer_names + self.data.meta["n_stds"] = len(standardizers) + else: + n_stds = metadata.meta.get("n_stds", None) + if n_stds is None: + n_stds = len(np.unique(metadata["location"])) + self.data.meta["n_stds"] = n_stds + self._standardizers = np.full((n_stds,), None) + + self._userColumns = [col for col in self.data.columns if col not in self._supporting_metadata] + + @classmethod + def read(cls, *args, format=None, units=None, descriptions=None, **kwargs): + """Create ImageCollection from a file containing serialized image + collection. + + Parameters + ---------- + filepath : `str` + Path to the file containing the serialized image collection. + + Returns + ------- + ic : `ImageCollection` + Image Collection + """ + metadata = Table.read(*args, format=format, units=units, descriptions=descriptions, **kwargs) + metadata["wcs"] = [WCS(w) if w is not None else None for w in metadata["wcs"]] + metadata["bbox"] = [json.loads(b) for b in metadata["bbox"]] + metadata["config"] = [json.loads(c) for c in metadata["config"]] + meta = json.loads( + metadata.meta["comments"][0], + ) + metadata.meta = meta + return cls(metadata) + + @classmethod + def fromStandardizers(cls, standardizers, meta=None): + """Create ImageCollection from a collection `Standardizers`. + + The `Standardizer` is "unravelled", i.e. the shared metadata is + duplicated for each entry marked as processable by the standardizer. On + an practical example - MJD timestamps are shared by all 62 science + exposures created by DECam in a single exposure, but pointing of each + one isn't. So each pointing is a new row in the metadata table for each + of the individual pointings. + + Parameters + ---------- + standardizers : `iterable` + Collection of `Standardizer` objects. + + Returns + ------- + ic : `ImageCollection` + Image Collection + """ + unravelledStdMetadata = [] + for i, stdFits in enumerate(standardizers): + # needs a "validate standardized" method here or in standardizers + stdMeta = stdFits.standardizeMetadata() + + # unravel all standardized keys whose values are iterables unless + # they are a string. "Unraveling" means that each processable item + # of standardizer gets its own row. Each non-iterable standardized + # item is copied into that row. F.e. "a.fits" with 3 images becomes + # location std_vals + # a.fits ...1 + # a.fits ...2 + # a.fits ...3 + unravelColumns = [ + key for key, val in stdMeta.items() if isiterable(val) and not isinstance(val, str) + ] + for j, ext in enumerate(stdFits.processable): + row = {} + for key in stdMeta.keys(): + if key in unravelColumns: + row[key] = stdMeta[key][j] + else: + row[key] = stdMeta[key] + row["std_idx"] = i + row["ext_idx"] = j + row["std_name"] = stdFits.name + unravelledStdMetadata.append(row) + + # We could even track things like `whoami`, `uname`, `time` etc. + meta = meta if meta is not None else {"n_stds": len(standardizers)} + metadata = Table(rows=unravelledStdMetadata, meta=meta) + return cls(metadata=metadata, standardizers=standardizers) + + @classmethod + def fromTargets(cls, tgts, force=None, config=None, **kwargs): + """Instantiate a ImageCollection class from a collection of targets + recognized by the standardizers, for example file paths, integer id, + dataset reference objects etc. + + Parameters + ---------- + locations : `str` or `iterable` + Collection of file-paths, a path to a directory, pathor URIs, to + FITS files or a path to a directory of FITS files or a butler + repository. + recursive : `bool` + If the location is a local filesystem directory, scan it + recursively including all sub-directories. + forceStandardizer : `Standardizer` or `None` + If `None`, when applicable, determine the correct `Standardizer` to + use automatically. Otherwise force the use of the given + `Standardizer`. + **kwargs : `dict` + Remaining kwargs, not listed here, are passed onwards to + the underlying `Standardizer`. + + Raises + ------ + ValueError: + when location is not recognized as a file, directory or an URI + """ + standardizers = [Standardizer.get(tgt, force=force, config=config, **kwargs) for tgt in tgts] + return cls.fromStandardizers(standardizers) + + @classmethod + def fromDir(cls, dirpath, recursive=False, force=None, config=None, **kwargs): + """Instantiate ImageInfoSet from a path to a directory + containing FITS files. + + Parameters + ---------- + locations : `str` or `iterable` + Collection of file-paths, a path to a directory, pathor URIs, to + FITS files or a path to a directory of FITS files or a butler + repository. + recursive : `bool` + If the location is a local filesystem directory, scan it + recursively including all sub-directories. + forceStandardizer : `Standardizer` or `None` + If `None`, when applicable, determine the correct `Standardizer` to + use automatically. Otherwise force the use of the given + `Standardizer`. + **kwargs : `dict` + Remaining kwargs, not listed here, are passed onwards to + the underlying `Standardizer`. + """ + fits_files = glob.glob(os.path.join(dirpath, "*fits*"), recursive=recursive) + return cls.fromTargets(fits_files, force=force, config=config, **kwargs) + + ######################## + # PROPERTIES (type operations and invariants) + ######################## + def __str__(self): + return str(self.data[self._userColumns]) + + def __repr__(self): + return repr(self.data).replace("Table", "ImageCollection") + + # Jupyter notebook hook for rendering output as HTML + def _repr_html_(self): + return self.data[self._userColumns]._repr_html_().replace("Table", "ImageCollection") + + def __getitem__(self, key): + if isinstance(key, (int, str, np.integer)): + return self.data[self._userColumns][key] + elif isinstance(key, (list, np.ndarray, slice)): + return self.__class__(self.data[key], standardizers=self._standardizers[key]) + else: + return self.data[key] + + def __setitem__(self, key, val): + self.data[key] = val + + def __len__(self): + return len(self.data) + + def __eq__(self, other): + # start with cheap comparisons + if not isinstance(other, ImageCollection): + return False + + if not self.meta == other.meta: + return False + + if not self.columns.keys() == other.columns.keys(): + return False + + # before we compare the entire tables (minus WCS, not comparable) + cols = [col for col in self.columns if col != "wcs"] + return (self.data[cols] == other.data[cols]).all() + + @property + def meta(self): + return self.data.meta + + @property + def wcs(self): + return self.data["wcs"].data + + @property + def bbox(self): + return self.data["bbox"].data + + @property + def columns(self): + """Return metadata columns.""" + return self.data[self._userColumns].columns + + @property + def standardizers(self, **kwargs): + """Standardizer generator.""" + for i in range(len(self.data)): + yield self.get_standardizer(i) + + def get_standardizer(self, index, **kwargs): + """Get the standardizer and extension index for the selected row of the + unravelled metadata table. + + A helper function that allows for access to non-required common + properties such as standardizer object, WCS, bounding boxes etc. + + Parameter + -------- + index : `int` + Index, as it appears in the unravelled table of metadata + properties. + **kwargs : `dict` + Keyword arguments are passed onto the Standardizer constructor. + + Returns + ------- + std : `dict` + A dictionary containing the standardizer (``std``) and the + extension (``ext``) that maps to the given metadata row index. + """ + row = self.data[index] + std_idx = row["std_idx"] + if self._standardizers[std_idx] is None: + std_cls = Standardizer.registry[row["std_name"]] + self._standardizers[std_idx] = std_cls(**kwargs, **row) + + # maybe a clever dataclass to shortcut the idx lookups on the user end? + return {"std": self._standardizers[std_idx], "ext": self.data[index]["ext_idx"]} + + def get_standardizers(self, idxs, **kwargs): + """Get the standardizers used to extract metadata of the selected + rows. + + Parameters + ---------- + idxs : `int` or `iterable` + Index of the row for which to retrieve the Standardizer. + **kwargs : `dict` + Keyword arguments are passed onto the constructors of the retrieved + Standardizer. + + Returns + ------- + std : `list`` + A list of dictionaries containing the standardizer (``std``) and + the extension (``ext``) that maps to the given metadata row index. + """ + if isinstance(idxs, int): + return [ + self.get_standardizer(idxs, **kwargs), + ] + else: + return [self.get_standardizer(idx, **kwargs) for idx in idxs] + + ######################## + # FUNCTIONALITY (object operations, transformative functionality) + ######################## + def write(self, *args, format=None, serialize_method=None, **kwargs): + """Write the ImageCollection to a file or file-like object. + + A light wrapper around the underlying AstroPy's Table ``write`` + functionality. See `astropy/io.ascii.write` + `documentation `_ + """ + tmpdata = self.data.copy() + + # a long history: https://github.com/astropy/astropy/issues/4669 + # short of which is that WCS will not roundtrip itself the way we want + wcs_strs = [] + for wcs in self.wcs: + header = wcs.to_header(relax=True) + h, w = wcs.pixel_shape + header["NAXIS1"] = (h, "height of the original image axis") + header["NAXIS2"] = (w, "width of the original image axis") + wcs_strs.append(header.tostring()) + tmpdata["wcs"] = wcs_strs + + bbox = [json.dumps(b) for b in self.bbox] + tmpdata["bbox"] = bbox + + configs = [json.dumps(entry["std"].config.toDict()) for entry in self.standardizers] + # If we name this config then the unpacking operator in get_std will + # catch it. Otherwise, we need to explicitly handle it in read. + tmpdata["config"] = configs + + # some formats do not officially support comments, like CSV, others + # have no problems with comments, some provide a workaround like + # dumping only meta["comment"] section if comment="#" kwarg is given. + # Because of these inconsistencies we'll just package everything into + # "comments" tag and then unpack at read time. + stringified = json.dumps(tmpdata.meta) + current_comments = tmpdata.meta.get("comments", None) + if current_comments is not None: + tmpdata.meta = {} + tmpdata.meta["comments"] = [ + stringified, + ] + + tmpdata.write(*args, format=format, serialize_method=serialize_method, **kwargs) + + def get_zero_shifted_times(self): + """Returns a list of timestamps such that the first image + is at time 0. + + Returns + ------- + List of floats + A list of zero-shifted times (JD or MJD). + """ + # what's the deal here - are we required to be sorted? + return self.data["mjd"] - self.data["mjd"][0] + + def get_duration(self): + """Returns a list of timestamps such that the first image + is at time 0. + + Returns + ------- + List of floats + A list of zero-shifted times (JD or MJD). + """ + # maybe timespan? + return self.data["mjd"][-1] - self.data["mjd"][0] + + def toWorkUnit(self, config): + """Return an `~kbmod.WorkUnit` object for processing with + KBMOD. + + Parameters + ---------- + config : `~kbmod.SearchConfiguration` + Search configuration. + + Returns + ------- + work_unit : `~kbmod.WorkUnit` + A `~kbmod.WorkUnit` object for processing with KBMOD. + """ + layeredImages = [img for std in self.standardizers for img in std["std"].toLayeredImage()] + imgstack = ImageStack(layeredImages) + if None not in self.wcs: + return WorkUnit(imgstack, config, per_image_wcs=self.wcs) + return WorkUnit(imgstack, config) diff --git a/src/kbmod/standardizers/__init__.py b/src/kbmod/standardizers/__init__.py new file mode 100644 index 00000000..db45873d --- /dev/null +++ b/src/kbmod/standardizers/__init__.py @@ -0,0 +1,3 @@ +from .standardizer import * +from .fits_standardizers import * +from .butler_standardizer import * diff --git a/src/kbmod/standardizers/butler_standardizer.py b/src/kbmod/standardizers/butler_standardizer.py new file mode 100644 index 00000000..56e33a43 --- /dev/null +++ b/src/kbmod/standardizers/butler_standardizer.py @@ -0,0 +1,338 @@ +"""Class for standardizing Data Products of Vera C. Rubin Science Pipelines +via the Rubin Data Butler. +""" + +import importlib +import uuid + +import astropy.nddata as bitmask +from astropy.wcs import WCS +import numpy as np +from scipy.signal import convolve2d + +from .standardizer import Standardizer, StandardizerConfig +from kbmod.search import LayeredImage, RawImage, PSF + + +__all__ = [ + "ButlerStandardizer", + "ButlerStandardizerConfig", +] + + +def deferred_import(module, name=None): + """Imports module/class/function/name as ``name`` into global modules. + If module/class/function/name already exists does nothing. + + Intended for importing a large module or a library only when needed, as to + avoid the cost of the import if the functionality depending on the imported + library is not being used. + + Used to defer the import of Vera C. Rubin Middleware component only during + `ButlerStandardizer` initialization so that we may be able to import KBMOD + before the heat death of the universe. + + Parameters + ---------- + module : `str` + Fully qualified name of the module/submodule/class/function/name. + name : `str` or `None`, optional + Name the target is imported as. + + Raises + ------ + ImportError : + Target is not reachable. Ensure package is + installed and visible int the environment. + """ + if name is None: + name = module + if globals().get(name, False): + return + try: + globals()[name] = importlib.import_module(module) + except ImportError as e: + raise ImportError(f"Module {module} or name {name} not found.") from e + + +class ButlerStandardizerConfig(StandardizerConfig): + do_mask = True + """Perform masking if ``True``, otherwise return an empty mask.""" + + do_bitmask = True + """Mask ``mask_flags`` from the mask plane in the FITS file.""" + + do_threshold = False + """Mask all pixels above the given count threshold.""" + + grow_mask = True + """Grow mask footprint by ``grow_kernel_shape``""" + + brightness_treshold = 10 + """Pixels with value greater than this threshold will be masked.""" + + grow_kernel_shape = (10, 10) + """Size of the symmetric square kernel by which mask footprints will be + increased by.""" + + mask_flags = ["BAD", "CLIPPED", "CR", "CROSSTALK", "EDGE", "NO_DATA", "SAT", "SENSOR_EDGE", "SUSPECT"] + """List of flags that will be masked.""" + + psf_std = 1 + """Standard deviation of the Point Spread Function.""" + + +class ButlerStandardizer(Standardizer): + """Standardizer for Vera C. Rubin Data Products, namely the underlying + ``Exposure`` objects of ``calexp``, ``difim`` and various ``*coadd`` + dataset types. + + This standardizer will volunteer to process ``DatasetRef`` or ``DatasetId`` + objects. The Rubin Data Butler is expected to be provided at + instantiation time, in addition to the target we want to standardize. + + Parameters + ---------- + tgt : `lsst.daf.butler.core.DatasetId`, `lsst.daf.butler.core.DatasetRef` or `int` + Target to standardize. + butler : `lsst.daf.butler.Butler` + Vera C. Rubin Data Butler. + config : `StandardizerConfig`, `dict` or `None`, optional + Configuration key-values used when standardizing the file. + + Attributes + ---------- + butler : `lsst.daf.butler.Butler` + Vera C. Rubin Data Butler. + ref : `lsst.daf.butler.core.DatasetRef` + Dataset reference to the given target + exp : `lsst.afw.image.exposure.Exposure` + The `Exposure` object targeted by the ``ref`` + processable : `list[lsst.afw.image.exposure.Exposure]` + Items marked as processable by the standardizer. See `Standardizer`. + """ + + name = "ButlerStandardizer" + priority = 2 + configClass = ButlerStandardizerConfig + + @classmethod + def resolveTarget(self, tgt): + # DatasetId is a type alias for UUID's so it'll be like a large int or + # hex or string of int/hex etc. We try to cast to UUID to check if str + # is compliant + # https://github.com/lsst/daf_butler/blob/main/python/lsst/daf/butler/_dataset_ref.py#L265 + if isinstance(tgt, uuid.UUID): + return True + + if isinstance(tgt, str): + try: + uuid.UUID(tgt) + except ValueError: + return False + else: + return True + + # kinda hacky, but I don't want to import the entire Stack before I + # absolutely know I need/have it. + tgttype = str(type(tgt)).lower() + if "datasetref" in tgttype or "datasetid" in tgttype: + return True + + return False + + def __init__(self, id, butler, config=None, **kwargs): + super().__init__(butler.datastore.root, config=config) + self.butler = butler + + deferred_import("lsst.daf.butler", "dafButler") + + if isinstance(id, dafButler.DatasetRef): + ref = id + elif isinstance(id, dafButler.DatasetId): + ref = butler.registry.getDataset(id) + elif isinstance(id, (uuid.UUID, str)): + ref = butler.registry.getDataset(dafButler.DatasetId(id)) + else: + raise TypeError("Expected DatasetRef, DatasetId or an unique integer ID, " f"got {id} instead.") + + self.ref = ref + self.exp = butler.get( + ref, + collections=[ + ref.run, + ], + ) + self.processable = [ + self.exp, + ] + self._wcs = [] + self._bbox = [] + + @property + def wcs(self): + if not self._wcs: + self._wcs = list(self.standardizeWCS()) + return self._wcs + + @property + def bbox(self): + if not self._bbox: + self._bbox = list(self.standardizeBBox()) + return self._bbox + + def _computeBBox(self, wcs, dimX, dimY): + """Given an WCS and the dimensions of an image calculates the values of + world coordinates at image corner and image center. + + Parameters + ---------- + wcs : `object` + The header, Astropy HDU and its derivatives. + dimX : `int` + Image dimension in x-axis. + dimY : `int` + Image dimension in y-axis. + + Returns + ------- + standardizedBBox : `dict` + Calculated coorinate values, a dict with, ``wcs_center_[ra, dec]`` + and ``wcs_corner_[ra, dec]`` keys. + + Notes + ----- + The center point is assumed to be at the (dimX/2, dimY/2) pixel + coordinates, rounded down. Corner is taken to be the (0,0)-th pixel. + """ + standardizedBBox = {} + centerX, centerY = int(dimX / 2), int(dimY / 2) + + centerSkyCoord = wcs.pixel_to_world(centerX, centerY) + cornerSkyCoord = wcs.pixel_to_world(0, 0) + + standardizedBBox["center_ra"] = centerSkyCoord.ra.deg + standardizedBBox["center_dec"] = centerSkyCoord.dec.deg + + standardizedBBox["corner_ra"] = cornerSkyCoord.ra.deg + standardizedBBox["corner_dec"] = cornerSkyCoord.dec.deg + + return standardizedBBox + + def standardizeMetadata(self): + metadata = {} + metadata["location"] = self.butler.getURI( + self.ref, + collections=[ + self.ref.run, + ], + ).geturl() + metadata.update({"wcs": self.wcs, "bbox": self.bbox}) + + metadata["mjd"] = self.exp.visitInfo.date.toAstropy().mjd + metadata["filter"] = self.exp.info.getFilter().physicalLabel + metadata["id"] = str(self.ref.id) + metadata["exp_id"] = self.exp.info.id + + # Potentially over-engineered; when will bbox be there if wcs isn't, + # what happens if WCS fails to construct? + if "ra" not in metadata or "dec" not in metadata: + # delete both? + metadata.pop("ra", None) + metadata.pop("dec", None) + if all(self.bbox): + metadata["ra"] = [bb["center_ra"] for bb in self.bbox] + metadata["dec"] = [bb["center_dec"] for bb in self.bbox] + elif all(self.wcs): + dimx, dimy = self.exp.getWidth(), self.exp.getHeight() + centerSkyCoord = self.wcs[0].pixel_to_world(dimx / 2, dimy / 2) + metadata["ra"] = centerSkyCoord.ra.deg + metadata["dec"] = centerSkyCoord.dec.deg + + return metadata + + # These were expected to be generators, but because we already evaluated + # the entire exposure, i.e. we already paid the cost of disk IO + # TODO: Add lazy eval by punting metadata standardization to visit_info + # table in the registry and thus optimize building of ImageCollection + def standardizeScienceImage(self): + return [ + self.exp.image.array, + ] + + def standardizeVarianceImage(self): + return [ + self.exp.variance.array, + ] + + def standardizeMaskImage(self): + # Return empty masks if no masking is done + if not self.config["do_mask"]: + sizes = self._bestGuessImageDimensions() + return (np.zeros(size) for size in sizes) + + # Otherwise load the mask extension and process it + mask = self.exp.mask.array.astype(int) + + if self.config["do_bitmask"]: + # flip_bits makes ignore_flags into mask_these_flags + bit_flag_map = self.exp.mask.getMaskPlaneDict() + bit_flag_map = {key: int(2**val) for key, val in bit_flag_map.items()} + mask = bitmask.bitfield_to_boolean_mask( + bitfield=mask, + ignore_flags=self.config["mask_flags"], + flag_name_map=bit_flag_map, + flip_bits=True, + ) + + if self.config["do_threshold"]: + bmask = self.exp.image.array > self.config["brightness_threshold"] + mask = mask | bmask + + if self.config["grow_mask"]: + grow_kernel = np.ones(self.config["grow_kernel_shape"]) + mask = convolve2d(mask, grow_kernel, mode="same").astype(bool) + + return [ + mask, + ] + + def standardizePSF(self): + # TODO: Update when we formalize the PSF, Any of these are available + # from the stack: + # self.exp.psf.computeImage + # self.exp.psf.computeKernelImage + # self.exp.psf.getKernel + # self.exp.psf.getLocalKernel + std = self.config["psf_std"] + return [ + PSF(std), + ] + + def standardizeWCS(self): + return [ + WCS(self.exp.wcs.getFitsMetadata()) if self.exp.hasWcs() else None, + ] + + def standardizeBBox(self): + if self.exp.hasWcs(): + dimx, dimy = self.exp.getWidth(), self.exp.getHeight() + return [ + self._computeBBox(self.wcs[0], dimx, dimy), + ] + else: + return None + + def toLayeredImage(self): + meta = self.standardizeMetadata() + sciences = self.standardizeScienceImage() + variances = self.standardizeVarianceImage() + masks = self.standardizeMaskImage() + psfs = self.standardizePSF() + + # guaranteed to exist, i.e. safe to access + mjds = meta["mjd"] + imgs = [] + for sci, var, mask, psf, t in zip(sciences, variances, masks, psfs, mjds): + imgs.append(LayeredImage(RawImage(sci), RawImage(var), RawImage(mask), t, psf)) + return imgs diff --git a/src/kbmod/standardizers/fits_standardizers/__init__.py b/src/kbmod/standardizers/fits_standardizers/__init__.py new file mode 100644 index 00000000..a0e43560 --- /dev/null +++ b/src/kbmod/standardizers/fits_standardizers/__init__.py @@ -0,0 +1,4 @@ +from .kbmodv1 import * +from .single_extension_fits import * +from .multi_extension_fits import * +from .fits_standardizer import * diff --git a/src/kbmod/standardizers/fits_standardizers/fits_standardizer.py b/src/kbmod/standardizers/fits_standardizers/fits_standardizer.py new file mode 100644 index 00000000..a83c82f7 --- /dev/null +++ b/src/kbmod/standardizers/fits_standardizers/fits_standardizer.py @@ -0,0 +1,443 @@ +""" `FitsStandardizer` standardize local file-system FITS files. + +Prefer to use specific FITS standardizers that specify the type of the FITS +file, such as `SingleExtensionFits` or `MultiExtensionFits`, whenever possible. + +`FitsStandardizer` is primarily useful to handle shared functionality and +simplify further processing, so there is not much to gain by using it directly. +""" + +from os.path import isfile +from pathlib import Path + +from astropy.utils import isiterable +import astropy.io.fits as fits +from astropy.wcs import WCS +import numpy as np + +from ..standardizer import Standardizer, StandardizerConfig, ConfigurationError +from kbmod.search import LayeredImage, RawImage, PSF + + +__all__ = [ + "FitsStandardizer", + "FitsStandardizerConfig", +] + + +class FitsStandardizerConfig(StandardizerConfig): + psf_std = 1 + """Standard deviation of the Point Spread Function. + When a ``float``, uniform STD applied to all processable items in the + standardizer. When a list, must be of the equal length as number of + processable items, an index-matched STD per processable item. + """ + + +class FitsStandardizer(Standardizer): + """Supports processing of a single FITS file. + + This is an `Standardizer` stub and can not be used directly. Its intended + use is to facilitate implementing of new standardizers. If you are + implementing a `FitsStandardizer`, consider inheriting from + `SingleExtensionFits` or `MultiExtensionFits`. + + Standardizers inheriting from `FitsStandardizer`` require that FITS file + is readable by AstroPy. + + Parameters + ---------- + location : `str` or `None`, optional + Location of the file (if any) that is standardized. Required if + ``hdulist`` is not provided. + hdulist : `astropy.io.fits.HDUList` or `None`, optional + HDUList to standardize. Required if ``location`` is not provided. + If provided, and ``location`` is not given, defaults to ``:memory:``. + config : `StandardizerConfig` or `dict`, optional + Configuration key-values used when standardizing the file. When not + provided, uses `configClass` to determine the defaults configuration + class. + + Attributes + ---------- + hdulist : `~astropy.io.fits.HDUList` + All HDUs found in the FITS file + primary : `~astropy.io.fits.PrimaryHDU` + The primary HDU. + processable : `list` + Any additional extensions marked by the standardizer for further + processing. Does not include the primary HDU if it doesn't contain any + image data. Contains at least 1 entry. + """ + + # Standardizers we don't want to register themselves we leave nameless + # Since FitsStd isn't usable by itself - we do not register it. + # name = "FitsStandardizer" + # priority = 0 + configClass = FitsStandardizerConfig + """The default standardizer configuration.""" + + valid_extensions = [".fit", ".fits", ".fits.fz"] + """File extensions this processor can handle.""" + + @classmethod + def resolveFromPath(cls, tgt): + """Successfully resolves FITS files on a local file system, that are + readable by AstroPy. + + Parameters + ---------- + tgt : str + Path to FITS file. + + Returns + ------- + canStandardize : `bool` + `True` if target is a FITS file readable by AstroPy. `False` otherwise. + resources : `dict` + Empty dict when ``canStandardize`` is `False`, otherwise the + `"hdulist"`` key contains the constructed `~fits.HDUList` object. + + Raises + ----- + FileNotFoundError - when file doesn't exist + """ + canProcess = False + + # nasty hack, should do better extensions + fname = Path(tgt) + extensions = fname.suffixes + + # if the extensions are empty, we don't think it's a FITS file + if not extensions: + return False, {} + + if extensions[-1] in cls.valid_extensions: + try: + hdulist = fits.open(tgt) + except OSError: + # OSError - file isn't a FITS file + # FileNotFoundError - bad file, let it raise + pass + else: + canProcess = True + + return canProcess, {"hdulist": hdulist} + + @classmethod + def resolveTarget(cls, tgt): + """Returns ``True`` if the target is a FITS file on a local filesystem, + that can be read by AstroPy FITS module, or an `~astropy.io.fits.HDUList`. + + Parameters + ---------- + tgt : str + Path to FITS file. + + Returns + ------- + canStandardize : `bool` + `True` if target is a FITS file readable by AstroPy. `False` otherwise. + resources : `dict`, optional + An empty dictionary when ``canStandardize`` is `False`. A dict + containing `~fits.HDUList` when ``canStandardize`` is `True`. + + Raises + ----- + FileNotFoundError - when target is path to file that doesn't exist + TypeError - when target is not HDUList or a filepath. + """ + if isinstance(tgt, str): + return cls.resolveFromPath(tgt) + elif isinstance(tgt, fits.HDUList): + return True, {"hdulist": tgt} + return False, {} + + def __init__(self, location=None, hdulist=None, config=None, **kwargs): + # TODO: oh no, ImageCollection needs to roundtrip from kwargs only. + # This means either having a "tgt" column (I'm not sure that is such a + # bad idea) or having STD inits that support different data sources as + # kwargs, but understand that sometimes the kwargs can be flipped since + # Standardizer.get can be used without kwarg. I need to improve on this + if isinstance(location, fits.HDUList): + hdulist = location + location = None + + # Failure modes are: + # - if we have neither location nor HDUList we complain + # - if location is not a file, but we have no HDUList we complain + if location is None and hdulist is None: + raise ValueError("Expected location or HDUList, got neither.") + + if hdulist is None and (location == ":memory:" or not isfile(location)): + raise FileNotFoundError("Given location is not a file, but no hdulist is given.") + + # Otherwise it's pretty normal + # - if location is ok and no HDUList exists, read location + # - if HDUList exists and location doesn't, try to get loc from it, put + # :memory: otherwise + # - if hdulist and location exist - nothing to do. + # The object will attempt to close the hdulist when it gets GC'd + if location is not None and hdulist is None: + hdulist = fits.open(location) + elif location is None and hdulist is not None: + location = ":memory:" if hdulist.filename() is None else hdulist.filename() + + super().__init__(location, config=config, **kwargs) + self.hdulist = hdulist + self.primary = self.hdulist["PRIMARY"].header + self.isMultiExt = len(self.hdulist) > 1 + + self.processable = [] + self._wcs = [] + self._bbox = [] + + def __del__(self): + # Try to close if there's anything to close. Python does not guarantee + # this method is called, or in what order it's called so various things + # are potentially already GC'd and attempts fails. There's nothing else + # that we can do at that point. + hdulist = getattr(self, "hdulist", None) + if hdulist is not None: + try: + hdulist.close(output_verify="ignore", verbose=False) + except: + pass + + def close(self, output_verify="exception", verbose=False, closed=True): + """Close the associated FITS file and memmap object, if any. + + See `astropy.io.fits.HDUList.close`. + + Parameters + ---------- + output_verify : `str` + Output verification option. Must be one of ``"fix"``, + ``"silentfix"``, ``"ignore"``, ``"warn"``, or ``"exception"``. May + also be any combination of ``"fix"`` or ``"silentfix"`` with + ``"+ignore"``, ``"+warn"``, or ``"+exception"`` (e.g. + ``"fix+warn"``). See Astropy Verification Options for more info. + verbose : bool + When `True`, print out verbose messages. + closed : bool + When `True`, close the underlying file object. + """ + self.hdulist.close(output_verify=output_verify, verbose=verbose, closed=closed) + + @property + def wcs(self): + if not self._wcs: + self._wcs = list(self.standardizeWCS()) + return self._wcs + + @property + def bbox(self): + if not self._bbox: + self._bbox = list(self.standardizeBBox()) + return self._bbox + + def _computeBBox(self, wcs, dimX, dimY): + """Given an WCS and the dimensions of an image calculates the values of + world coordinates at image corner and image center. + + Parameters + ---------- + wcs : `object` + The header, Astropy HDU and its derivatives. + dimX : `int` + Image dimension in x-axis. + dimY : `int` + Image dimension in y-axis. + + Returns + ------- + standardizedBBox : `dict` + Calculated coorinate values, a dict with, ``wcs_center_[ra, dec]`` + and ``wcs_corner_[ra, dec]`` keys. + + Notes + ----- + The center point is assumed to be at the (dimX/2, dimY/2) pixel + coordinates, rounded down. Corner is taken to be the (0,0)-th pixel. + """ + standardizedBBox = {} + centerX, centerY = int(dimX / 2), int(dimY / 2) + + centerSkyCoord = wcs.pixel_to_world(centerX, centerY) + cornerSkyCoord = wcs.pixel_to_world(0, 0) + + standardizedBBox["center_ra"] = centerSkyCoord.ra.deg + standardizedBBox["center_dec"] = centerSkyCoord.dec.deg + + standardizedBBox["corner_ra"] = cornerSkyCoord.ra.deg + standardizedBBox["corner_dec"] = cornerSkyCoord.dec.deg + + return standardizedBBox + + def _bestGuessImageDimensions(self): + """Makes a best guess at the dimensions of the extensions registered + as processable. + + The best guess methodology assumes that image sizes are encoded in the + header NAXIS keywords for each extension registered as processable. If + they are not, the image size is set to the best guess image size. + + The best guess image size is determined by, in order of operations: + + 1) assuming the primary header encodes the image size in ``NAXIS1`` and + ``NAXIS2`` keywords + 2) if the NAXIS keywords are not found, by attempting to read them from + the header of the first registered processable image + 3) and if they are still not found, by loading the first processable + extension data and using its shape to determine the dimensions. + + Thus the sizes of all extensions that do not have NAXIS keywords will + default to what either the primary header NAXIS keywords state, the + first registered extension NAXIS keywords state, or the dimension of + the data in the first registered extension. + + This usually reflects the reality because focal planes are not often + tiled with CCDs of varying sizes, and the registered extensions are + precisely the images created by the science CCDs. + + The method performs better than determining the exact size for each + extension from its data attribute as it doesn't load the actual data + into memory each time. + + Returns + ------- + sizes : `list` + List of tuples ``(dimx, dimy)`` of the best guess sizes of the + extensions. + """ + # We perhaps do more work than neccessary here, but I don't have a good + # example of a FITS file without NAXIS1 and NAXIS2 keys in at least + # one of the headers. + guessNaxis1 = self.primary.get("NAXIS1", None) + guessNaxis2 = self.primary.get("NAXIS2", None) + + # NAXIS kwargs don't exist in primary + if guessNaxis1 is None or guessNaxis2 is None: + guessNaxis1 = self.processable[0].header.get("NAXIS1", None) + guessNaxis2 = self.processable[0].header.get("NAXIS2", None) + + # NAXIS kwargs don't exist in primary or first extension + if guessNaxis1 is None or guessNaxis2 is None: + guessNaxis1, guessNaxis2 = self.processable[0].data.shape + + return ( + (e.header.get("NAXIS1", None) or guessNaxis1, e.header.get("NAXIS2", None) or guessNaxis2) + for e in self.processable + ) + + def standardizeWCS(self): + # we should have checks here that the constructed WCS isn't the default + # empy WCS, which can happen. It is quite annoying the error is silent + return (WCS(ext.header) for ext in self.processable) + + def standardizeBBox(self): + sizes = self._bestGuessImageDimensions() + return (self._computeBBox(wcs, size[0], size[1]) for wcs, size in zip(self.wcs, sizes)) + + def translateHeader(self, *args, **kwargs): + """Maps the header keywords to the required and optional metadata. + + Is required to return a dictionary containing at least the following + keys and values: + + ======== ============================================================== + Key Description + ======== ============================================================== + mjd Decimal MJD timestamp of the start of the observation + ra Right Ascension in ICRS coordinate system of the extracted, or + calculated on-sky poisiton of the central pixel, pointing + data. + dec Declination in ICRS coordinate system, expressed in decimal + degrees, of the extracted, or calculated, on-sky poisiton of + the data. + ======== ============================================================== + + Returns + ------- + metadata : `dict` + Required and optional metadata. + """ + raise NotImplementedError("This FitsStandardizer doesn't implement a header standardizer.") + + def standardizeMetadata(self): + metadata = self.translateHeader() + metadata["location"] = self.location + metadata.update({"wcs": self.wcs, "bbox": self.bbox}) + + # calculate the pointing from the bbox or wcs if they exist? + # I feel like I've over-engineered this. When will bbox ever not be + # there if wcs is? What happens if WCS fails to construct? etc... + if "ra" not in metadata or "dec" not in metadata: + # delete both? + metadata.pop("ra", None) + metadata.pop("dec", None) + if all(self.bbox): + metadata["ra"] = [bb["center_ra"] for bb in self.bbox] + metadata["dec"] = [bb["center_dec"] for bb in self.bbox] + elif all(self.wcs): + # like, this is almost a copy of bbox + sizes = self._bestGuessImageDimensions() + metadata["ra"], metadata["dec"] = [], [] + for (dimx, dimy), wcs in zip(self.wcs, sizes): + centerSkyCoord = wcs.pixel_to_world(dimx / 2, dimy / 2) + metadata["ra"].append(centerSkyCoord.ra.deg) + metadata["dec"].append(centerSkyCoord.dec.deg) + + return metadata + + def standardizeScienceImage(self): + # the assumption here is that all Exts are AstroPy HDU objects + return (ext.data for ext in self.processable) + + def standardizePSF(self): + stds = self.config["psf_std"] + if isiterable(stds): + if len(stds) != len(self.processable): + raise ConfigurationError( + "Number of PSF STDs does not match the " + "declared number of processable units " + "requiring a PSF instance." + ) + return (PSF(std) for std in stds) + elif isinstance(stds, (int, float)): + return (PSF(stds) for i in self.processable) + else: + raise TypeError("Expected a number or a list, got {type(stds)}: {stds}") + + def toLayeredImage(self): + """Returns a list of `~kbmod.search.layered_image` objects for each + entry marked as processable. + + Returns + ------- + layeredImage : `list` + Layered image objects. + """ + meta = self.standardizeMetadata() + sciences = self.standardizeScienceImage() + variances = self.standardizeVarianceImage() + masks = self.standardizeMaskImage() + + psfs = self.standardizePSF() + + # guaranteed to exist, i.e. safe to access, but isn't unravelled here + # or potentially it is - we can't tell? + if isinstance(meta["mjd"], (list, tuple)): + mjds = meta["mjd"] + else: + mjds = (meta["mjd"] for e in self.processable) + + # Sci and var will be, potentially, loaded by AstroPy as float32 arrays + # already. Depends on the header keys really, but that's Standardizer's + # job. Lack of generics in CPP code forces casting of mask, making a + # copy. TODO: fix when/if CPP stuff is fixed. + imgs = [] + for sci, var, mask, psf, t in zip(sciences, variances, masks, psfs, mjds): + imgs.append(LayeredImage(RawImage(sci), RawImage(var), RawImage(mask.astype(np.float32)), psf)) + + return imgs diff --git a/src/kbmod/standardizers/fits_standardizers/kbmodv1.py b/src/kbmod/standardizers/fits_standardizers/kbmodv1.py new file mode 100644 index 00000000..3f2cd1c8 --- /dev/null +++ b/src/kbmod/standardizers/fits_standardizers/kbmodv1.py @@ -0,0 +1,204 @@ +"""Class for standardizing FITS files produced by the Vera C. Rubin +Science Pipelines as they were specified during the KBMOD V1.0 development. + +There are no guarantees that current Rubin Science Pipeline Data Products can +still be standardized with this class. To ensure correct behavior for any +version of the Rubin Stack, use the `ButlerStandardizer`. +""" + +from astropy.time import Time +from astropy.nddata import bitmask + +import numpy as np +from scipy.signal import convolve2d + +from .multi_extension_fits import MultiExtensionFits, FitsStandardizerConfig + + +__all__ = [ + "KBMODV1", + "KBMODV1Config", +] + + +class KBMODV1Config(FitsStandardizerConfig): + do_mask = True + """Perform masking if ``True``, otherwise return an empty mask.""" + + do_bitmask = True + """Mask ``mask_flags`` from the mask plane in the FITS file.""" + + do_threshold = False + """Mask all pixels above the given count threshold.""" + + grow_mask = True + """Grow mask footprint by ``grow_kernel_shape``""" + + brightness_treshold = 10 + """Pixels with value greater than this threshold will be masked.""" + + grow_kernel_shape = (10, 10) + """Size of the symmetric square kernel by which mask footprints will be + increased by.""" + + bit_flag_map = { + "BAD": 2**0, + "CLIPPED": 2**9, + "CR": 2**3, + "CROSSTALK": 2**10, + "DETECTED": 2**5, + "DETECTED_NEGATIVE": 2**6, + "EDGE": 2**4, + "INEXACT_PSF": 2**11, + "INTRP": 2**2, + "NOT_DEBLENDED": 2**12, + "NO_DATA": 2**8, + "REJECTED": 2**13, + "SAT": 2**1, + "SENSOR_EDGE": 2**14, + "SUSPECT": 2**7, + "UNMASKEDNAN": 2**15, + } + """Mapping between the flag meaning to its value.""" + + mask_flags = ["BAD", "EDGE", "NO_DATA", "SUSPECT", "UNMASKEDNAN"] + """List of flags that will be used when masking.""" + + +class KBMODV1(MultiExtensionFits): + """Standardizer for Vera C. Rubin Science Pipelines Imdiff Data Products, + as they were produced by the Science Pipelines and procedure described in + arXiv:2109.03296 and arXiv:2310.03678. + + This standardizer exists for backward compatibility purposes. Its use is + not recommended. Use `ButlerStandardizer` instead. + + This standardizer will volunteer to process FITS whose primary header + contains ``"ZTENSION"``, ``"ZPCOUNT"``, ``"ZGCOUNT"`` and ``"CCDNUM"``. + + Parameters + ---------- + location : `str` or `None`, optional + Location of the file (if any) that is standardized. Required if + ``hdulist`` is not provided. + hdulist : `astropy.io.fits.HDUList` or `None`, optional + HDUList to standardize. Required if ``location`` is not provided. + If provided, and ``location`` is not given, defaults to ``:memory:``. + config : `StandardizerConfig`, `dict` or `None`, optional + Configuration key-values used when standardizing the file. + + Attributes + ---------- + hdulist : `~astropy.io.fits.HDUList` + All HDUs found in the FITS file + primary : `~astropy.io.fits.PrimaryHDU` + The primary HDU. + processable : `list` + Any additional extensions marked by the standardizer for further + processing. Does not include the primary HDU if it doesn't contain any + image data. Contains at least 1 entry. + wcs : `list` + WCSs associated with the processable image data. Will contain + at least 1 WCS. + bbox : `list` + Bounding boxes associated with each WCS. + """ + + name = "KBMODV1" + priority = 2 + configClass = KBMODV1Config + + @classmethod + def resolveTarget(cls, tgt): + parentCanStandardize, resources = super().resolveTarget(tgt) + + if not parentCanStandardize: + return False + + # A rough best guess here, I'm certain we can find a Rubin + # signature somewhere in the header that's a clearer signal + # that this is a Rubin Sci Pipe product + hdulist = resources["hdulist"] + primary = hdulist["PRIMARY"].header + isRubin = all( + ("ZTENSION" in primary, "ZPCOUNT" in primary, "ZGCOUNT" in primary, "CCDNUM" in primary) + ) + + canStandardize = parentCanStandardize and isRubin + return canStandardize, resources + + def __init__(self, location=None, hdulist=None, config=None, **kwargs): + super().__init__(location=location, hdulist=hdulist, config=config, **kwargs) + + # this is the only science-image header for Rubin + self.processable = [ + self.hdulist["IMAGE"], + ] + + def translateHeader(self): + """Returns the following metadata, read from the primary header, as a + dictionary: + + ======== ========== =================================================== + Key Header Key Description + ======== ========== =================================================== + mjd DATE-AVG Decimal MJD timestamp of the middle of the exposure + filter FILTER Filter band + visit_id IDNUM Visit ID + observat OBSERVAT Observatory name + obs_lat OBS-LAT Observatory Latitude + obs_lon OBS-LONG Observatory Longitude + obs_elev OBS-ELEV Observatory elevation. + ======== ========== =================================================== + """ + # this is the 1 mandatory piece of metadata we need to extract + standardizedHeader = {} + obs_datetime = Time(self.primary["DATE-AVG"], format="isot") + standardizedHeader["mjd"] = obs_datetime.mjd + + # these are all optional things + standardizedHeader["filter"] = self.primary["FILTER"] + standardizedHeader["visit_id"] = self.primary["IDNUM"] + standardizedHeader["observat"] = self.primary["OBSERVAT"] + standardizedHeader["obs_lat"] = self.primary["OBS-LAT"] + standardizedHeader["obs_lon"] = self.primary["OBS-LONG"] + standardizedHeader["obs_elev"] = self.primary["OBS-ELEV"] + + return standardizedHeader + + def _standardizeMask(self): + # Return empty masks if no masking is done + if not self.config["do_mask"]: + sizes = self._bestGuessImageDimensions() + return (np.zeros(size) for size in sizes) + + # Otherwise load the mask extension and process it + mask = self.hdulist["MASK"].data + + if self.config["do_bitmask"]: + # flip_bits makes ignore_flags into mask_these_flags + mask = bitmask.bitfield_to_boolean_mask( + bitfield=mask, + ignore_flags=self.config["mask_flags"], + flag_name_map=self.config["bit_flag_map"], + flip_bits=True, + ) + + if self.config["do_threshold"]: + bmask = self.processable[0].data > self.config["brightness_threshold"] + mask = mask | bmask + + if self.config["grow_mask"]: + grow_kernel = np.ones(self.config["grow_kernel_shape"]) + mask = convolve2d(mask, grow_kernel, mode="same").astype(bool) + + return mask + + # hmm, making these generators made sense when thinking about + # ImageCollection, makes it kinda awkward now, we could yield from + # _stdMask but then we need to raise StopIteration + def standardizeMaskImage(self): + return (self._standardizeMask() for i in self.processable) + + def standardizeVarianceImage(self): + return (self.hdulist["VARIANCE"].data for i in self.processable) diff --git a/src/kbmod/standardizers/fits_standardizers/multi_extension_fits.py b/src/kbmod/standardizers/fits_standardizers/multi_extension_fits.py new file mode 100644 index 00000000..582c6bd3 --- /dev/null +++ b/src/kbmod/standardizers/fits_standardizers/multi_extension_fits.py @@ -0,0 +1,111 @@ +"""Standardizer for FITS files containing multiple extensions.""" + +from astropy.io.fits import CompImageHDU, PrimaryHDU, ImageHDU + +from .fits_standardizer import FitsStandardizer, FitsStandardizerConfig + + +__all__ = [ + "MultiExtensionFits", +] + + +class MultiExtensionFits(FitsStandardizer): + """Suppports processing of a single, multi-extension FITS file. + + Extensions for which it's possible to extract WCS, bounding boxes and masks + are required to be places in the ``exts`` attribute. For single extension + FITS files this is the primary header, as it contains the image data. + + Parameters + ---------- + location : `str` + Location of the FITS file, can be an URI or local filesystem path. + set_exts : `bool` + When `True`, finds all HDUs that are image-like and sets them as + elements of `exts` list. Note that using the default `_isImageLike` + implementation is rather costly as it loads the whole data into memory. + + Attributes + ---------- + hdulist : `~astropy.io.fits.HDUList` + All HDUs found in the FITS file + primary : `~astropy.io.fits.PrimaryHDU` + The primary HDU. + exts : `list` + All HDUs from `hdulist` marked as "image-like" for further processing + with KBMOD. Does not include the primary HDU, when it doesn't contain + any image data. Contains at least 1 entry. + wcs : `list` + WCSs associated with the processable image data. Will contain at least + 1 WCS. + bbox : `list` + Bounding boxes associated with each WCS. + """ + + # Standardizers we don't want to register themselves we leave nameless + # Since FitsStd isn't usable by itself - we do not register it. + # name = "MultiExtensionFitsStandardizer" + # priority = 1 + configClass = FitsStandardizerConfig + + @staticmethod + def _isImageLikeHDU(hdu): + """If the given HDU contains an image, returns `True`, otherwise + `False`. + + HDU is determined to be an image if it's one of the primary, image or + compressed image types in Astropy and its ``.data`` attribute is not + empty, but a 2D array with dimensions less than 6000 rows and columns. + + This is a generic best-guess implementation and there are no guarantees + that the retrieved extensions are science images, i.e. images + containing the actual sky, and not a small table-like HDU, guider or + focus chip images. + + Parameters + ---------- + hdu : `astropy.fits.HDU` + Header unit to inspect. + + Returns + ------- + image_like : `bool` + True if HDU is image-like, False otherwise. + """ + # This is already a pretty good basic test + if not any((isinstance(hdu, CompImageHDU), isinstance(hdu, PrimaryHDU), isinstance(hdu, ImageHDU))): + return False + + # The problem is that all kinds of things are stored as ImageHDUs, say + # a 120k x 8000k table (I'm looking at you SDSS!). To avoid that we + # need to check the data shape. The problem is that causes the data to + # load from disk and that is very expensive + if len(hdu.shape) != 2: + return False + + if hdu.shape[0] > 6000 or hdu.shape[1] > 6000: + return False + + if hdu.data is None: + return False + + return True + + @classmethod + def resolveTarget(cls, tgt): + parentCanStandardize, res = super().resolveTarget(tgt) + if not parentCanStandardize: + return False, {} + + canStandardize = parentCanStandardize and len(res["hdulist"]) > 1 + return canStandardize, res + + def __init__(self, location=None, hdulist=None, config=None, set_processable=False, **kwargs): + super().__init__(location=location, hdulist=hdulist, config=config, **kwargs) + + # do not load images from disk unless requested + if set_processable: + for hdu in self.hdulist: + if self._isImageLikeHDU(hdu): + self.processable.append(hdu) diff --git a/src/kbmod/standardizers/fits_standardizers/single_extension_fits.py b/src/kbmod/standardizers/fits_standardizers/single_extension_fits.py new file mode 100644 index 00000000..1073d5ae --- /dev/null +++ b/src/kbmod/standardizers/fits_standardizers/single_extension_fits.py @@ -0,0 +1,52 @@ +from .fits_standardizer import FitsStandardizer, FitsStandardizerConfig + + +class SingleExtensionFits(FitsStandardizer): + """Suppports processing of a single, single-extension, FITS file. + + The PRIMARY HDU contains the data and is the sole element of the ``exts``. + The WCS is extracted from the PRIMARY HDU as well. + + Parameters + ---------- + location : `str` + Location of the FITS file, can be an URI or local filesystem path. + + Attributes + ---------- + hdulist : `~astropy.io.fits.HDUList` + All HDUs found in the FITS file + primary : `~astropy.io.fits.PrimaryHDU` + The primary HDU. + exts : `list` + Any additional extensions marked by the standardizer for further + processing, `~astropy.io.fits.CompImageHDU` or + `~astropy.io.fits.ImageHDU` expected. Does not include the primary HDU + if it doesn't contain any image data. Contains at least 1 entry. + wcs : `list` + WCSs associated with the processable image data. Will contain + at least 1 WCS. + bbox : `list` + Bounding boxes associated + """ + + # Standardizers we don't want to register themselves we leave nameless + # Since FitsStd isn't usable by itself - we do not register it. + # name = "SingleExtensionFits" + # priority = 1 + configClass = FitsStandardizerConfig + + def __init__(self, location=None, hdulist=None, config=None, **kwargs): + super().__init__(location=location, hdulist=hdulist, config=config, **kwargs) + self.processable = [ + self.primary, + ] + + @classmethod + def resolveTarget(cls, tgt): + parentCanStandardize, res = super().resolveTarget(tgt) + if not parentCanStandardize: + return False, {} + + canStandardize = parentCanStandardize and len(res["hdulist"]) == 1 + return canStandardize, res diff --git a/src/kbmod/standardizers/standardizer.py b/src/kbmod/standardizers/standardizer.py new file mode 100644 index 00000000..94c83174 --- /dev/null +++ b/src/kbmod/standardizers/standardizer.py @@ -0,0 +1,600 @@ +"""`Standardizer` converts data from a given data source into a `LayeredImage` +object, applying data-source specific, transformations in the process. A +layered image is a collection containing: + * science image + * variance image + * mask image +along with the: + * on-sky coordinates of the pixel in the center of the science image + * observation timestamp + * location of the data source + +When possible, standardizers should attempt to extract a valid WCS and/or +bounding box from the data source. +""" + +import abc +import warnings + + +__all__ = ["Standardizer", "StandardizerConfig", "ConfigurationError"] + + +class ConfigurationError(Exception): + """Error that is raised when configuration parameters contain a logical error.""" + + +class StandardizerConfig: + """Base class for Standardizer configuration. + + Not all standardizers will (can) use the same parameters so refer to their + respective documentation for a more complete list. + + Parameters + ---------- + config : `dict`, `StandardizerConfig` or `None`, optional + Collection of configuration key-value pairs. + kwargs : optional + Keyword arguments, assigned as configuration key-values. + """ + + def __init__(self, config=None, **kwargs): + # This is a bit hacky, but it makes life a lot easier because it + # enables automatic loading of the default configuration and separation + # of default config from instance bound config + keys = list(set(dir(self.__class__)) - set(dir(StandardizerConfig))) + + # First fill out all the defaults by copying cls attrs + self._conf = {k: getattr(self, k) for k in keys} + + # Then override with any user-specified values + if config is not None: + self._conf.update(config) + self._conf.update(kwargs) + + # now just shortcut the most common dict operations + def __getitem__(self, key): + return self._conf[key] + + def __setitem__(self, key, value): + self._conf[key] = value + + def __str__(self): + res = f"{self.__class__.__name__}(" + for k, v in self.items(): + res += f"{k}: {v}, " + return res[:-2] + ")" + + def __len__(self): + return len(self._conf) + + def __contains__(self, key): + return key in self._conf + + def __eq__(self, other): + if isinstance(other, type(self)): + return self._conf == other._conf + elif isinstance(other, dict): + return self._conf == other + else: + return super().__eq__(other) + + def __iter__(self): + return iter(self._conf) + + def __or__(self, other): + if isinstance(other, type(self)): + return self.__class__(config=other._conf | self._conf) + elif isinstance(other, dict): + return self.__class__(config=self._conf | other) + else: + raise TypeError("unsupported operand type(s) for |: {type(self)} " "and {type(other)}") + + def keys(self): + """A set-like object providing a view on config's keys.""" + return self._conf.keys() + + def values(self): + """A set-like object providing a view on config's values.""" + return self._conf.values() + + def items(self): + """A set-like object providing a view on config's items.""" + return self._conf.items() + + def update(self, conf=None, **kwargs): + """Update this config from dict/other config/iterable. + + A dict-like update. If ``conf`` is present and has a ``.keys()`` + method, then does: ``for k in conf: this[k] = conf[k]``. If ``conf`` + is present but lacks a ``.keys()`` method, then does: + ``for k, v in conf: this[k] = v``. + + In either case, this is followed by: + ``for k in kwargs: this[k] = kwargs[k]`` + """ + if conf is not None: + self._conf.update(conf) + self._conf.update(kwargs) + + def toDict(self): + """Return this config as a dict.""" + return self._conf + + +class Standardizer(abc.ABC): + """Transforms given data into the format required by KBMOD. + + The given data are FITS images, Vera C. Rubin Science Pipeline objects and + other data that provides access to the images and observation metadata. + Standardizers are required to extract certain information from this data. + + The required metadata are the location of the data, timestamp and the + on-sky coordinates of the observation. The required image data is the + science exposure, variance and mask. + + The location of the data is a local filesystem path, URI or an URL. + Timestamp is the decimal MJD of the start of the exposure. + The on-sky coordinates are a pair of (ra, dec) values in decimal degrees of + the pointing information extracted from the header or the central pixel + on-sky coordinates calculated via WCS. + + The science exposure and variance are self-explanatory and the mask is a + simple bitmask in which pixels with value 1 will not be included in KBMOD + search. + + The standardizers are allowed to extract any additional information + desired, f.e. such as filter, but there are no guarantess that this + information is present for all availible standardizers. + + Of the optional metadata two properties are of particular interest - the + WCS and the bounding box of each science exposure. They underpin the way + the exposures of interest are grouped together. + KBMOD does not insist on providing them. however, requiring instead just a + more general "pointing" information. For these reason the WCS and BBOX have + their own standardize methods, and should be provided whenever possible. + + Data is expected to be unravelled on a per-science exposure level. Data + that shares some metadata, such as timestamp or filter for example, for + multiple science exposures is expected to unravel that metadata and + associate it with each of the science exposures individually. + + The data for which this standardization and unravelling will occur needs to + be appended to the items in ``processable`` attribute of the class. + + This class is an abstract base class to serve as a recipe for implementing + a Standardizer specialized for processing an particular dataset. Do not + attempt to instantiate this class directly. + + Parameters + ---------- + location : `str` + Location of the file to standardize, a filepath or URI. + + Attributes + ---------- + location : `str` + Location of the file being standardized. + processable : `list` + List of standardizers internal units that can be processed by KBMOD. + For example, for FITS files on local filesystem these could be the + AstroPy header units (`~astropy.io.fits.HDUList` elements), for Vera C. + Rubin their internal `Exposure*` objects etc. The number of processable + units will match the number of returned standardized images, + variances, masks and PSFs. + wcs : `list` + WCSs associated with the processable image data. + bbox : `list` + Bounding boxes associated with each processable image data. + """ + + registry = dict() + """All registered upload standardizer classes.""" + + name = None + """Processor's name. Only named standardizers will be registered.""" + + priority = 0 + """Priority. Standardizers with high priority are prefered over + standardizers with low priority when processing an upload. + """ + + configClass = StandardizerConfig + """Standardizer's configuration. A class whose attributes set the behavior + of the standardization.""" + + @classmethod + def get(cls, tgt, force=None, config=None, **kwargs): + """Get the standardizer class that can handle given file. + + When the standardizer is registered, it can be requested by its name. + See ``self.registry`` for a list of registered Standardizers. + + When the correct standardizer is not known, the the target can be + provided. The Standardizer with the highest priority that marks + the target as processable, will be returned. + + At least one of either the target or the standardizer parameters have + to be given. + + Parameters + ---------- + tgt : any + The target to be standardized. + force : `str` or `cls`, optional + Force the use of the given `Standardizer`. The given name must be a + part of the registered `Standardizers` or a callable. When no + `Standardizer` is forced, all registered standardizers are tested + and the highest priority `Standardizer` is selected. + config : `~StandardizerConfig`, `dict` or `None`, optional + Standardizer configuration or dictionary containing the config + parameters for standardization. When `None` default values for the + appropriate `Standardizer` will be used. + **kwargs : `dict`, optional + Any additional, optional, keyword arguments are passed into the + `Standardizer`. See relevant `Standardizer` documentation for + details. + + Returns + ------- + standardizer : `object` + Standardizer instance forced, or selected as the most appropriate + one, to process the given target.. + + Raises + ------ + ValueError + When neither target or standardizer are given. + ValueError + When no standardizer that marks the target as processable is found. + TypeError + When no standardizer that marks the target as processable is found. + KeyError + When the given standardizer name is not a part of the standardizer + registry. + """ + # A particular standardizer was is being forced, shortcut directly and + # let it raise if kwargs does not have the required resources + if force is not None and isinstance(force, type): + return force(tgt, config=config, **kwargs) + elif force is not None and isinstance(force, str): + try: + stdcls = cls.registry[force] + return stdcls(tgt, config=config, **kwargs) + except KeyError as e: + raise KeyError( + "Standardizer must be a registered standardizer name or a " + f"class reference. Expected {', '.join([std for std in cls.registry])} " + f"got '{standardizer}' instead. " + ) from e + + # The standardizer is unknown, check which standardizers volunteers and + # get the highest priority one. + volunteers = [] + for standardizer in cls.registry.values(): + resolved = standardizer.resolveTarget(tgt) + canStandardize, resources = (resolved, {}) if isinstance(resolved, bool) else resolved + if canStandardize: + volunteers.append((standardizer, resources)) + + # if no volunteers are found, raise + if not volunteers: + raise ValueError( + "None of the registered standardizers are able " + "to process this source. You can provide your " + "own. Refer to Standardizer documentation for " + "further details." + ) + + # if more than 1 volunteers are found, sort on priority and return + # the highest one + if len(volunteers) > 1: + get_prio = lambda volunteer: volunteer[0].priority # noqa: E731 + volunteers.sort(key=get_prio, reverse=True) + warnings.warn( + "Multiple standardizers declared ability to " f"standardize; using {volunteers[0][0].name}." + ) + + # and if there was ever only just the one volunteer, return it + standardizer, resources = volunteers[0] + return standardizer(tgt, config=config, **resources, **kwargs) + + @classmethod + @abc.abstractmethod + def resolveTarget(self, tgt): + """Returns a pair of values ``(canStandardize, resources)``. + + The first value, ``canStandardize``, indicates that the standardizer + is able to standardize the given target. The second value, ``resources``, + is an optional returned value, a dictionary containing any constructed + or resolved resources in the process. + + This method is called during automatic resolution of standardizers. In + that process, each registered `Standardizer` is asked to resolve the + target. The standardizers, and their resources if any, that successfully + resolved the target are sorted based on the `Standardizer` priority and + used to standardize the target. + + Because each `Standardizer` is asked to resolve each target, of which + there are potentially many, it is encouraged that this method is + implemented with performance in mind. + + Sometimes, however, it may not be possible to avoid acquiring or + constructing a resource that, in the given context, could be considered + expensive. Returning the resource(s) allows that cost to be optimized + away, by avoiding acquiring or constructing the resource again during + initialization. + + On a practical example: `FitsStandardizer` standardize FITS files + pointed to by a local filesystem path (i.e. the target). To confirm + their ability to standardize the target, they have to make sure they + can open and read the file. They use AstroPy to construct an + `~astropy.io.fits.HDUList`, i.e. the resource. Returning the already + constructed `~astropy.io.fits.HDUList` allows `FitsStandardizer` to + skip constructing a new one via `astropy.io.fits.open`. + + Parameters + ---------- + tgt : data-source + Some data source, URL, URI, POSIX filepath, Rubin Data Butler Data + Repository, an Python object, callable, etc. + + Returns + ------- + canStandardize : `bool` + `True` when the processor knows how to handle uploaded + file and `False` otherwise. + resources : `dict`, optional + Any optional resources constructed during target resolution. An + empty dictionary if there are none. + """ + raise NotImplementedError() + + @classmethod + def canStandardize(cls, tgt): + """Returns ``True`` when the standardizer knows how to process + the given upload (file) type. + + Parameters + ---------- + tgt : data-source + Some data source, URL, URI, POSIX filepath, Rubin Data Butler Data + Repository, an Python object, callable, etc. + + Returns + ------- + canStandardize : `bool` + `True` when the processor knows how to handle uploaded + file and `False` otherwise. + + Notes + ----- + Implementation is instrument-specific. + """ + return cls.resolveTarget(tgt)[0] + + def __init_subclass__(cls, **kwargs): + # Registers subclasses of this class with set `name` class + # parameters as availible standardizers. Note the specific + # standardizer has to be imported before this class since the + # mechanism is triggered at definition time. + name = getattr(cls, "name", None) + if name is not None: + super().__init_subclass__(**kwargs) + Standardizer.registry[cls.name] = cls + + @abc.abstractmethod + def __init__(self, location, config=None, **kwargs): + self.location = location + self.processable = [] + self.config = self.configClass(config) + + def __str__(self): + return f"{self.name}({self.location}, {self.processable})" + + def __repr__(self): + return f"{self.__class__.__name__}({self.location})" + + # all these should probably be named in plural - but lord allmighty that's + # a lot of references to update and it feels so unnatural - help? These lead + # to a lot of duplicated code right now, but that code relies on lazy eval + # of a list stored in _wcs and that looks more magical than just forcing + # users to specify the internal mechanism, despite code duplication. + def wcs(self): + """A list of WCS's or `None` for each entry marked as processable. + + Expected to be an WCS object, but when it is not possible to construct + one `None` will be returned in its place. + """ + raise NotImplementedError() + + @abc.abstractproperty + def bbox(self): + """A list of bounding boxes or `None` for each entry marked as + processable. + + ========== ============================================================ + Key Description + ========== ============================================================ + center_ra On-sky (ICRS) right ascension coordinate, expressed in + decimal degrees, of the center of an rectangular bounding + box. + center_dec On-sky (ICRS) declination coordinate, expressed in decimal + degrees, of the center of an rectangular bounding box. + corner_ra On-sky (ICRS) right ascension coordinate, expressed in + decimal degrees, of an corner of a rectangular bounding box. + corner_dec On-sky (ICRS) declination coordinate, expressed in decimal + degrees, of an corner of a rectangular bounding box. + ========== ============================================================ + + Often, when WCS can not be constructed, the BBOX can not be + constructed. Then `None` is expected to be returned in it's place. + """ + raise NotImplementedError() + + @abc.abstractmethod + def standardizeWCS(self): + """Creates an WCS for every processable science image. + + Returns + ------- + wcs : `list` + List of `~astropy.wcs.WCS` objects. + """ + raise NotImplementedError() + + @abc.abstractmethod + def standardizeBBox(self): + """Calculate the standardized bounding box, the world coordinates at + image corner and image center. + + Returns + ------- + standardizedBBox : `dict` + Calculated coordinate values, a dict with, ``wcs_center_[ra, dec]`` + and ``wcs_corner_[ra, dec]`` keys. + + Notes + ----- + Center can be assumed to be at the (dimX/2, dimY/2) pixel coordinates, + rounded down. Corner is taken to be the (0,0)-th pixel. + """ + raise NotImplementedError() + + @abc.abstractmethod + def standardizeMetadata(self): + """Standardizes required and optional metadata from the given data. + + The standardized metadata is expected to be a dict containing the + following keys and values: + + ======== ============================================================== + Key Description + ======== ============================================================== + location Path, URL or URI to the data. + mjd Decimal MJD timestamp of the start of the observation + ra Right Ascension in ICRS coordinate system of the extracted, or + calculated on-sky poisiton of the central pixel, pointing + data. + dec Declination in ICRS coordinate system, expressed in decimal + degrees, of the extracted, or calculated, on-sky poisiton of + the data. + wcs Result of `~standardizeWCS`, a list of `~astropy.wcs.WCS` + objects or a list of `None`s if they can not be constructed. + bbox Result of `standardizeBBox`, a list of standardized bounding + boxes or a list of `None`s if they can not be calculated. + ======== ============================================================== + + Returns + ------- + metadata : `dict` + Standardized metadata. + + Notes + ----- + If a target being standardized contains only 1 image that can be + standardized and processed by KBMOD, the values in the dictionary can + be strings, integers, floats, and other such single-values non-iterable + types, or an iterable type containing only 1 element. + When the target being standardized contains multiple images/extensions + that are processable by KBMOD the single-valued dict entries will be + considered as shared by all the processable images/extension/units of + the target. The iterable values in the dictionary must match the number + and order of the units marked as processable. + """ + raise NotImplementedError() + + @abc.abstractmethod + def standardizeScienceImage(self): + """Standardizes the science image data. + + For FITS files, this is usually a trivial no-op operation that returns + the `data` attribute of the correct HDU. For different data sources + this is a type casting, or a download operation. + + Returns + ------- + image : `list[~np.array]` + Science images. + """ + raise NotImplementedError() + + @abc.abstractmethod + def standardizeVarianceImage(self): + """Standardizes the variance data. + + For FITS files, this is sometimes a trivial no-op operation returning + the correct FITS extension. In other cases, this has to be calculated + if sufficient information is provided in the header or a different + file needs to be downloaded/read. + + .. note:: + Refer to the manual for the originating dataset, whether the + instrument or processing pipeline reference, as some variance planes + store the inverse of variance. + + Returns + ------- + variance : `list[`np.array]` + Variance images. + """ + raise NotImplementedError() + + @abc.abstractmethod + def standardizeMaskImage(self): + """Standardizes the mask data as an simple 0 (not masked) and 1 + (masked) bitmap. + + For FITS files, this is sometimes a trivial no-op operation returning + the correct FITS extension. In other cases, the mask has to be + constructed from external data, such as pixel masks and catalogs, + or downloaded/read from a different file at the data source. + + Returns + ------- + mask : `list[~np.array]` + Mask images. + """ + raise NotImplementedError() + + # no idea really what to do bout this one? AFAIK almost no images come + # with PSFs in them + @abc.abstractmethod + def standardizePSF(self): + """Returns PSF for each extension marked as processable. + + Returns + ------- + psfs : `list[~kbmod.search.psf]` + List of `~kbmod.search.psf` objects. + """ + raise NotImplementedError() + + @abc.abstractmethod + def toLayeredImage(self): + """Run metadata standardization methods. These include header + and bounding box standardization. + + Notes + ----- + Implementation is standardizer-specific. + """ + raise NotImplementedError() + + def standardize(self): + """Invokes standardize metadata, image, variance, mask and PSF and + returns the results as a dictionary. + + Returns + ------- + standardizedData : `dict` + Dictionary with standardized ``meta``, ``science``, ``variance``, + ``mask`` and ``psf`` values. + """ + std = {"meta": self.standardizeMetadata()} + std["science"] = self.standardizeScienceImage() + std["variance"] = self.standardizeVarianceImage() + std["mask"] = self.standardizeMaskImage() + std["psf"] = self.standardizePSF() + + return std diff --git a/tests/data/decam_imdiff_headers.ecsv.tar.bz2 b/tests/data/decam_imdiff_headers.ecsv.tar.bz2 new file mode 100644 index 00000000..cdaa2fe9 Binary files /dev/null and b/tests/data/decam_imdiff_headers.ecsv.tar.bz2 differ diff --git a/tests/dump_headers.py b/tests/dump_headers.py new file mode 100644 index 00000000..74fbc60f --- /dev/null +++ b/tests/dump_headers.py @@ -0,0 +1,500 @@ +# Modified from the original Astropy code to add a card format to the +# tabular output. All rights belong to the original authors. + +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +``fitsheader`` is a command line script based on astropy.io.fits for printing +the header(s) of one or more FITS file(s) to the standard output in a human- +readable format. + +Example uses of fitsheader: + +1. Print the header of all the HDUs of a .fits file:: + + $ fitsheader filename.fits + +2. Print the header of the third and fifth HDU extension:: + + $ fitsheader --extension 3 --extension 5 filename.fits + +3. Print the header of a named extension, e.g. select the HDU containing + keywords EXTNAME='SCI' and EXTVER='2':: + + $ fitsheader --extension "SCI,2" filename.fits + +4. Print only specific keywords:: + + $ fitsheader --keyword BITPIX --keyword NAXIS filename.fits + +5. Print keywords NAXIS, NAXIS1, NAXIS2, etc using a wildcard:: + + $ fitsheader --keyword NAXIS* filename.fits + +6. Dump the header keywords of all the files in the current directory into a + machine-readable csv file:: + + $ fitsheader --table ascii.csv *.fits > keywords.csv + +7. Specify hierarchical keywords with the dotted or spaced notation:: + + $ fitsheader --keyword ESO.INS.ID filename.fits + $ fitsheader --keyword "ESO INS ID" filename.fits + +8. Compare the headers of different fits files, following ESO's ``fitsort`` + format:: + + $ fitsheader --fitsort --extension 0 --keyword ESO.INS.ID *.fits + +9. Same as above, sorting the output along a specified keyword:: + + $ fitsheader -f -s DATE-OBS -e 0 -k DATE-OBS -k ESO.INS.ID *.fits + +10. Sort first by OBJECT, then DATE-OBS:: + + $ fitsheader -f -s OBJECT -s DATE-OBS *.fits + +Note that compressed images (HDUs of type +:class:`~astropy.io.fits.CompImageHDU`) really have two headers: a real +BINTABLE header to describe the compressed data, and a fake IMAGE header +representing the image that was compressed. Astropy returns the latter by +default. You must supply the ``--compressed`` option if you require the real +header that describes the compression. + +With Astropy installed, please run ``fitsheader --help`` to see the full usage +documentation. +""" + +import argparse +import sys + +import numpy as np + +from astropy import __version__, log +from astropy.io import fits + +DESCRIPTION = """ +Print the header(s) of a FITS file. Optional arguments allow the desired +extension(s), keyword(s), and output format to be specified. +Note that in the case of a compressed image, the decompressed header is +shown by default. + +This script is part of the Astropy package. See +https://docs.astropy.org/en/latest/io/fits/usage/scripts.html#module-astropy.io.fits.scripts.fitsheader +for further documentation. +""".strip() + + +class ExtensionNotFoundException(Exception): + """Raised if an HDU extension requested by the user does not exist.""" + + +class HeaderFormatter: + """Class to format the header(s) of a FITS file for display by the + `fitsheader` tool; essentially a wrapper around a `HDUList` object. + + Example usage: + fmt = HeaderFormatter('/path/to/file.fits') + print(fmt.parse(extensions=[0, 3], keywords=['NAXIS', 'BITPIX'])) + + Parameters + ---------- + filename : str + Path to a single FITS file. + verbose : bool + Verbose flag, to show more information about missing extensions, + keywords, etc. + + Raises + ------ + OSError + If `filename` does not exist or cannot be read. + """ + + def __init__(self, filename, verbose=True): + self.filename = filename + self.verbose = verbose + self._hdulist = fits.open(filename) + + def parse(self, extensions=None, keywords=None, compressed=False): + """Returns the FITS file header(s) in a readable format. + + Parameters + ---------- + extensions : list of int or str, optional + Format only specific HDU(s), identified by number or name. + The name can be composed of the "EXTNAME" or "EXTNAME,EXTVER" + keywords. + + keywords : list of str, optional + Keywords for which the value(s) should be returned. + If not specified, then the entire header is returned. + + compressed : bool, optional + If True, shows the header describing the compression, rather than + the header obtained after decompression. (Affects FITS files + containing `CompImageHDU` extensions only.) + + Returns + ------- + formatted_header : str or astropy.table.Table + Traditional 80-char wide format in the case of `HeaderFormatter`; + an Astropy Table object in the case of `TableHeaderFormatter`. + """ + # `hdukeys` will hold the keys of the HDUList items to display + if extensions is None: + hdukeys = range(len(self._hdulist)) # Display all by default + else: + hdukeys = [] + for ext in extensions: + try: + # HDU may be specified by number + hdukeys.append(int(ext)) + except ValueError: + # The user can specify "EXTNAME" or "EXTNAME,EXTVER" + parts = ext.split(",") + if len(parts) > 1: + extname = ",".join(parts[0:-1]) + extver = int(parts[-1]) + hdukeys.append((extname, extver)) + else: + hdukeys.append(ext) + + # Having established which HDUs the user wants, we now format these: + return self._parse_internal(hdukeys, keywords, compressed) + + def _parse_internal(self, hdukeys, keywords, compressed): + """The meat of the formatting; in a separate method to allow overriding.""" + result = [] + for idx, hdu in enumerate(hdukeys): + try: + cards = self._get_cards(hdu, keywords, compressed) + except ExtensionNotFoundException: + continue + + if idx > 0: # Separate HDUs by a blank line + result.append("\n") + result.append(f"# HDU {hdu} in {self.filename}:\n") + for c in cards: + result.append(f"{c}\n") + return "".join(result) + + def _get_cards(self, hdukey, keywords, compressed): + """Returns a list of `astropy.io.fits.card.Card` objects. + + This function will return the desired header cards, taking into + account the user's preference to see the compressed or uncompressed + version. + + Parameters + ---------- + hdukey : int or str + Key of a single HDU in the HDUList. + + keywords : list of str, optional + Keywords for which the cards should be returned. + + compressed : bool, optional + If True, shows the header describing the compression. + + Raises + ------ + ExtensionNotFoundException + If the hdukey does not correspond to an extension. + """ + # First we obtain the desired header + try: + if compressed: + # In the case of a compressed image, return the header before + # decompression (not the default behavior) + header = self._hdulist[hdukey]._header + else: + header = self._hdulist[hdukey].header + except (IndexError, KeyError): + message = f"{self.filename}: Extension {hdukey} not found." + if self.verbose: + log.warning(message) + raise ExtensionNotFoundException(message) + + if not keywords: # return all cards + cards = header.cards + else: # specific keywords are requested + cards = [] + for kw in keywords: + try: + crd = header.cards[kw] + if isinstance(crd, fits.card.Card): # Single card + cards.append(crd) + else: # Allow for wildcard access + cards.extend(crd) + except KeyError: # Keyword does not exist + if self.verbose: + log.warning(f"{self.filename} (HDU {hdukey}): Keyword {kw} not found.") + return cards + + def close(self): + self._hdulist.close() + + +class TableHeaderFormatter(HeaderFormatter): + """Class to convert the header(s) of a FITS file into a Table object. + The table returned by the `parse` method will contain four columns: + filename, hdu, keyword, and value. + + Subclassed from HeaderFormatter, which contains the meat of the formatting. + """ + + def _parse_internal(self, hdukeys, keywords, compressed): + """Method called by the parse method in the parent class.""" + tablerows = [] + for hdu in hdukeys: + try: + for card in self._get_cards(hdu, keywords, compressed): + tablerows.append( + { + "filename": self.filename, + "hdu": hdu, + "keyword": card.keyword, + "value": str(card.value), + "format": type(card.value).__name__, + } + ) + except ExtensionNotFoundException: + pass + + if tablerows: + from astropy import table + + return table.Table(tablerows) + return None + + +def print_headers_traditional(args): + """Prints FITS header(s) using the traditional 80-char format. + + Parameters + ---------- + args : argparse.Namespace + Arguments passed from the command-line as defined below. + """ + for idx, filename in enumerate(args.filename): # support wildcards + if idx > 0 and not args.keyword: + print() # print a newline between different files + + formatter = None + try: + formatter = HeaderFormatter(filename) + print(formatter.parse(args.extensions, args.keyword, args.compressed), end="") + except OSError as e: + log.error(str(e)) + finally: + if formatter: + formatter.close() + + +def print_headers_as_table(args): + """Prints FITS header(s) in a machine-readable table format. + + Parameters + ---------- + args : argparse.Namespace + Arguments passed from the command-line as defined below. + """ + tables = [] + # Create a Table object for each file + for filename in args.filename: # Support wildcards + formatter = None + try: + formatter = TableHeaderFormatter(filename) + tbl = formatter.parse(args.extensions, args.keyword, args.compressed) + if tbl: + tables.append(tbl) + except OSError as e: + log.error(str(e)) # file not found or unreadable + finally: + if formatter: + formatter.close() + + # Concatenate the tables + if len(tables) == 0: + return False + elif len(tables) == 1: + resulting_table = tables[0] + else: + from astropy import table + + resulting_table = table.vstack(tables) + # Print the string representation of the concatenated table + resulting_table.write(sys.stdout, format=args.table) + + +def print_headers_as_comparison(args): + """Prints FITS header(s) with keywords as columns. + + This follows the dfits+fitsort format. + + Parameters + ---------- + args : argparse.Namespace + Arguments passed from the command-line as defined below. + """ + from astropy import table + + tables = [] + # Create a Table object for each file + for filename in args.filename: # Support wildcards + formatter = None + try: + formatter = TableHeaderFormatter(filename, verbose=False) + tbl = formatter.parse(args.extensions, args.keyword, args.compressed) + if tbl: + # Remove empty keywords + tbl = tbl[np.where(tbl["keyword"] != "")] + else: + tbl = table.Table([[filename]], names=("filename",)) + tables.append(tbl) + except OSError as e: + log.error(str(e)) # file not found or unreadable + finally: + if formatter: + formatter.close() + + # Concatenate the tables + if len(tables) == 0: + return False + elif len(tables) == 1: + resulting_table = tables[0] + else: + resulting_table = table.vstack(tables) + + # If we obtained more than one hdu, merge hdu and keywords columns + hdus = resulting_table["hdu"] + if np.ma.isMaskedArray(hdus): + hdus = hdus.compressed() + if len(np.unique(hdus)) > 1: + for tab in tables: + new_column = table.Column([f"{row['hdu']}:{row['keyword']}" for row in tab]) + tab.add_column(new_column, name="hdu+keyword") + keyword_column_name = "hdu+keyword" + else: + keyword_column_name = "keyword" + + # Check how many hdus we are processing + final_tables = [] + for tab in tables: + final_table = [table.Column([tab["filename"][0]], name="filename")] + if "value" in tab.colnames: + for row in tab: + if row["keyword"] in ("COMMENT", "HISTORY"): + continue + final_table.append(table.Column([row["value"]], name=row[keyword_column_name])) + final_tables.append(table.Table(final_table)) + final_table = table.vstack(final_tables) + # Sort if requested + if args.sort: + final_table.sort(args.sort) + # Reorganise to keyword by columns + final_table.pprint(max_lines=-1, max_width=-1) + + +if __name__ == "__main__": + """This is the main function called by the `fitsheader` script.""" + parser = argparse.ArgumentParser( + description=DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}") + + parser.add_argument( + "-e", + "--extension", + metavar="HDU", + action="append", + dest="extensions", + help=( + "specify the extension by name or number; this argument can " + "be repeated to select multiple extensions" + ), + ) + parser.add_argument( + "-k", + "--keyword", + metavar="KEYWORD", + action="append", + type=str, + help=( + "specify a keyword; this argument can be repeated to select " + "multiple keywords; also supports wildcards" + ), + ) + mode_group = parser.add_mutually_exclusive_group() + mode_group.add_argument( + "-t", + "--table", + nargs="?", + default=False, + metavar="FORMAT", + help=( + "print the header(s) in machine-readable table format; the " + 'default format is "ascii.fixed_width" (can be "ascii.csv", ' + '"ascii.html", "ascii.latex", "fits", etc)' + ), + ) + mode_group.add_argument( + "-f", + "--fitsort", + action="store_true", + help=("print the headers as a table with each unique " "keyword in a given column (fitsort format) "), + ) + parser.add_argument( + "-s", + "--sort", + metavar="SORT_KEYWORD", + action="append", + type=str, + help=( + "sort output by the specified header keywords, can be repeated to " + "sort by multiple keywords; Only supported with -f/--fitsort" + ), + ) + parser.add_argument( + "-c", + "--compressed", + action="store_true", + help=( + "for compressed image data, show the true header which describes " + "the compression rather than the data" + ), + ) + parser.add_argument( + "filename", + nargs="+", + help="path to one or more files; wildcards are supported", + ) + args = parser.parse_args() + # If `--table` was used but no format specified, + # then use ascii.fixed_width by default + if args.table is None: + args.table = "ascii.fixed_width" + + if args.sort: + args.sort = [key.replace(".", " ") for key in args.sort] + if not args.fitsort: + log.error("Sorting with -s/--sort is only supported in conjunction with" " -f/--fitsort") + # 2: Unix error convention for command line syntax + sys.exit(2) + + if args.keyword: + args.keyword = [key.replace(".", " ") for key in args.keyword] + + # Now print the desired headers + try: + if args.table: + print_headers_as_table(args) + elif args.fitsort: + print_headers_as_comparison(args) + else: + print_headers_traditional(args) + except OSError: + # A 'Broken pipe' OSError may occur when stdout is closed prematurely, + # eg. when calling `fitsheader file.fits | head`. We let this pass. + pass diff --git a/tests/test_butlerstd.py b/tests/test_butlerstd.py new file mode 100644 index 00000000..4336d9f6 --- /dev/null +++ b/tests/test_butlerstd.py @@ -0,0 +1,338 @@ +import os +import uuid +import tempfile +import unittest +from unittest import mock + +from astropy.time import Time +from astropy.wcs import WCS +import numpy as np + +from utils import DECamImdiffFactory +from kbmod import PSF, Standardizer, StandardizerConfig +from kbmod.standardizers import ButlerStandardizer, ButlerStandardizerConfig, KBMODV1Config + + +# Use a shared factory so that we can reference the same fits files in mocks +# and tests without having to untar the archive multiple times. +FitsFactory = DECamImdiffFactory() + + +# Patch Rubin Middleware out of existence +class Registry: + def getDataset(self, ref): + return ref + + +class Datastore: + def __init__(self, root): + self.root = root + + +class DatasetRef: + def __init__(self, ref): + self.ref = ref + self.run = ref + + +class DatasetId: + def __init__(self, ref): + self.id = ref + self.ref = ref + self.run = ref + + +class MockButler: + """Mocked Vera C. Rubin Data Butler functionality sufficient to be used in + a ButlerStandardizer. + + The mocked .get method will return an mocked Exposure object with all the, + generally, expected attributes (info, visitInfo, image, variance, mask, + wcs). Most of these attributes are mocked such that they return an integer + id, which is then used in a FitsFactory to read out the serialized header + of some underlying real data. Particularly, we target DECam, such that + outputs of ButlerStandardizer and KBMODV1 are comparable. + + By default the mocked image arrays will contain the empty + `Butler.empty_arrat` but providing a callable `mock_images_f`, that takes + in a single mocked Exposure object, and assigns the: + * mocked.image.array + * mocked.variance.array + * mocked.mask.array + attributes can be used to customize the returned arrays. + """ + + def __init__(self, root, ref=None, mock_images_f=None): + self.datastore = Datastore(root) + self.registry = Registry() + self.current_ref = ref + self.mockImages = mock_images_f + + def getURI(self, ref, collections=None): + mocked = mock.Mock(name="ButlerURI") + mocked.geturl.return_value = f"file:/{self.datastore.root}" + return mocked + + def getDataset(self, datid): + return self.get(datid) + + def get(self, ref, collections=None): + # Butler.get gets a DatasetRef, but can take an DatasetRef or DatasetId + # DatasetId is type alias for UUID's, which are hex-strings when + # serialized. We short it to an integer, because We use an integer to + # read a particular file in FitsFactory. This means we got to cast + # all these different objects to int (somehow). Firstly, it's one of + # our mocks, dig out the value we really care about: + if isinstance(ref, (DatasetId, DatasetRef)): + ref = ref.ref + + # that value can be an int, a simple str(int) (used in testing only), + # a large hex UUID string, or a UUID object. Duck-type them to int + if isinstance(ref, uuid.UUID): + ref = ref.int + elif isinstance(ref, str): + try: + ref = uuid.UUID(ref).int + except (ValueError, AttributeError): + # likely a str(int) + pass + + # Cast to int to cover for all eventualities + ref = int(ref) + self.current_ref = ref + + # Finally we can proceed with mocking. Butler.get (the way we use it at + # least) returns an Exposure[F/I/...] object. Exposure is like our + # LayeredImage. We need to mock every attr, method and property that we + # call the standardizer. We shortcut the results to match the KBMODV1. + hdul = FitsFactory.get_fits(ref % FitsFactory.n_files, spoof_data=True) + prim = hdul["PRIMARY"].header + + mocked = mock.Mock( + name="Exposure", + spec_set=[ + "visitInfo", + "info", + "hasWcs", + "getWidth", + "getHeight", + "getFilter", + "image", + "variance", + "mask", + "wcs", + ], + ) + + # General metadata mocks + mocked.visitInfo.date.toAstropy.return_value = Time(hdul["PRIMARY"].header["DATE-AVG"], format="isot") + mocked.info.id = prim["EXPID"] + mocked.getWidth.return_value = hdul[1].header["NAXIS1"] + mocked.getHeight.return_value = hdul[1].header["NAXIS2"] + mocked.info.getFilter().physicalLabel = prim["FILTER"] + + # Rubin Sci. Pipes. return their own internal SkyWcs object. We mock a + # Header that'll work with ButlerStd instead. It works because in the + # STD we cast SkyWcs to dict-like thing, from which we make a WCS. What + # happens if SkyWcs changes though? + wcshdr = WCS(hdul[1].header).to_header(relax=True) + wcshdr["NAXIS1"] = hdul[1].header["NAXIS1"] + wcshdr["NAXIS2"] = hdul[1].header["NAXIS2"] + mocked.hasWcs.return_value = True + mocked.wcs.getFitsMetadata.return_value = wcshdr + + # Mocking the images consists of using the Factory default, then + # invoking any user specified method on the mocked exposure obj. + mocked.image.array = hdul["IMAGE"].data + mocked.variance.array = hdul["VARIANCE"].data + mocked.mask.array = hdul["MASK"].data + if self.mockImages is not None: + self.mockImages(mocked) + + # Same issue as with WCS, what if there's a change in definition of the + # mask plane? Note the change in definition of a flag to exponent only. + bit_flag_map = {} + for key, val in KBMODV1Config.bit_flag_map.items(): + bit_flag_map[key] = int(np.log2(val)) + mocked.mask.getMaskPlaneDict.return_value = bit_flag_map + + return mocked + + +class dafButler: + """Intercepts calls ``import lsst.daf.butler as dafButler`` and shortcuts + them to our mocks. + """ + + DatasetRef = DatasetRef + DatasetId = DatasetId + Butler = MockButler + + +@mock.patch.dict( + "sys.modules", + { + "lsst.daf.butler": dafButler, + "lsst.daf.butler.core.DatasetRef": DatasetRef, + "lsst.daf.butler.core.DatasetId": DatasetId, + }, +) +class TestButlerStandardizer(unittest.TestCase): + """Test ButlerStandardizer.""" + + def setUp(self): + self.butler = MockButler("/far/far/away") + + def test_init(self): + """Test ButlerStandardizer can be built from DatasetRef, DatasetId and + the dataset id.""" + # Just makes sure no errors are raised, whether it actually does what + # we want is tested later. + _ = ButlerStandardizer(uuid.uuid1(), butler=self.butler) + _ = ButlerStandardizer(uuid.uuid1().hex, butler=self.butler) + _ = ButlerStandardizer(DatasetRef(2), butler=self.butler) + _ = ButlerStandardizer(DatasetId(3), butler=self.butler) + + _ = Standardizer.get(DatasetRef(5), butler=self.butler) + _ = Standardizer.get(DatasetId(6), butler=self.butler) + + _ = Standardizer.get(DatasetId(6), butler=self.butler, force=ButlerStandardizer) + + def test_standardize(self): + """Test ButlerStandardizer instantiates and standardizes as expected.""" + std = Standardizer.get(DatasetId(7), butler=self.butler) + standardized = std.standardize() + + fits = FitsFactory.get_fits(7, spoof_data=True) + hdr = fits["PRIMARY"].header + expected = { + "mjd": Time(hdr["DATE-AVG"], format="isot").mjd, + "filter": hdr["FILTER"], + "id": "7", + "exp_id": hdr["EXPID"], + "location": "file://far/far/away", + } + + for k, v in expected.items(): + with self.subTest("Value not standardized as expected.", key=k): + self.assertEqual(v, standardized["meta"][k]) + + # The CRVAL1/2 are with respect to the origin (CRPIX), Our center_ra + # definition uses the pixel in the center of the CCD. The permissible + # deviation should be on the scale of half a CCD's footprint, unless + # it's DECam then it could be as big as half an FOV of the focal plane + self.assertAlmostEqual(standardized["meta"]["ra"][0], fits[1].header["CRVAL1"], 1) + self.assertAlmostEqual(standardized["meta"]["dec"][0], fits[1].header["CRVAL2"], 1) + + # compare standardized images + # fmt: off + np.testing.assert_equal([fits["IMAGE"].data,], standardized["science"]) + np.testing.assert_equal([fits["VARIANCE"].data,], standardized["variance"]) + np.testing.assert_equal([fits["MASK"].data,], standardized["mask"]) + # fmt: on + + # these are not easily comparable so just assert they exist + self.assertTrue(standardized["meta"]["wcs"]) + self.assertTrue(standardized["meta"]["bbox"]) + + def test_roundtrip(self): + """Test ButlerStandardizer can instantiate itself from standardized + data and a Data Butler.""" + std = Standardizer.get(DatasetId(8), butler=self.butler) + standardized = std.standardize() + + std2 = ButlerStandardizer(**standardized["meta"], butler=self.butler) + self.assertIsInstance(std2, ButlerStandardizer) + + standardized2 = std2.standardize() + # TODO: I got to come up with some reasonable way of comparing this + for k in ["location", "bbox", "mjd", "filter", "id", "exp_id", "ra", "dec"]: + self.assertEqual(standardized["meta"][k], standardized2["meta"][k]) + + def mock_kbmodv1like_bitmasking(self, mockedexp): + """Assign each flag that exists to a pixel, standardize, then expect + the mask to only contain those pixels that are also in mask_flags. + The grow_kernel is so large by default it would mask the nearly the + whole image, so we turn it off. + + Because Rubin keeps flag map in the FITS file headers the + ButlerStdConfig does not contain them. We mock these to match the + DECam KBMODV1-like flags in MockButler, so we can set pixels to those + flag values here. + """ + mask_arr = mockedexp.mask.array + for i, flag in enumerate(KBMODV1Config.bit_flag_map): + mask_arr.ravel()[i] = KBMODV1Config.bit_flag_map[flag] + + # These tests are the same as KBMODV1 because the two hadn't diverged yet + def test_bitmasking(self): + """Test masking with direct config works as expected.""" + butler = MockButler("/far/far/away", mock_images_f=self.mock_kbmodv1like_bitmasking) + + conf = StandardizerConfig(grow_mask=False) + std = Standardizer.get(DatasetId(9), butler=butler, config=conf) + standardizedMask = std.standardizeMaskImage() + + for mask in standardizedMask: + for i, flag in enumerate(KBMODV1Config.bit_flag_map): + with self.subTest("Failed to mask expected", flag=flag): + if flag in ButlerStandardizerConfig.mask_flags: + self.assertEqual(mask.ravel()[i], True) + else: + self.assertEqual(mask.ravel()[i], False) + + def mock_kbmodv1like_thresholding(self, mockedexp): + """Set image pixel [1, 1] to 1 and [2, 2] to 3.""" + mockedexp.image.array[1, 1] = 1 + mockedexp.image.array[2, 2] = 3 + + def test_threshold_masking(self): + """Test brightness threshold masking. Test config overrides.""" + butler = MockButler("/far/far/away", mock_images_f=self.mock_kbmodv1like_thresholding) + + conf = StandardizerConfig( + { + "grow_mask": False, + "do_threshold": True, + "brightness_threshold": 2, + } + ) + std = Standardizer.get(DatasetId(10), butler=butler, config=conf) + mask = std.standardizeMaskImage()[0] + + self.assertFalse(mask[1, 1]) + self.assertTrue(mask[2, 2]) + + def mock_kbmodv1like_growmask(self, mockedexp): + """Flag image pixel [2, 2] as BAD, and expect grow_mask to grow that + mask to all neighboring pixels. Again, because flags are not available + through the butler, but exposures only, we mocked them to be the same + like DECam KBMODV1 flags. + """ + mockedexp.mask.array[2, 2] = KBMODV1Config.bit_flag_map["BAD"] + + def test_grow_mask(self): + """Test mask grows as expected.""" + butler = MockButler("/far/far/away", mock_images_f=self.mock_kbmodv1like_growmask) + + conf = StandardizerConfig({"grow_mask": True, "grow_kernel_shape": (3, 3)}) + std = Standardizer.get(DatasetId(11), butler=butler, config=conf) + mask = std.standardizeMaskImage()[0] + + self.assertTrue(mask[1:3, 1:3].all()) + self.assertFalse(mask[:, 0].all()) + self.assertFalse(mask[0, :].all()) + self.assertFalse(mask[-1, :].all()) + self.assertFalse(mask[:, -1].all()) + + def test_psf(self): + """Test PSFs are created as expected. Test instance config overrides.""" + std = Standardizer.get(DatasetId(11), butler=self.butler) + + psf = std.standardizePSF()[0] + self.assertIsInstance(psf, PSF) + self.assertEqual(psf.get_std(), std.config["psf_std"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_imagecollection.py b/tests/test_imagecollection.py new file mode 100644 index 00000000..0f97e0f3 --- /dev/null +++ b/tests/test_imagecollection.py @@ -0,0 +1,126 @@ +import os +import shutil +import tempfile +import unittest + +import numpy as np +import astropy.table as atbl + +from kbmod import ImageCollection, Standardizer +from utils import DECamImdiffFactory + + +class TestImageCollection(unittest.TestCase): + """Test ImageCollection class.""" + + fitsFactory = DECamImdiffFactory() + + def setUp(self): + self.fits = self.fitsFactory.get_n(3) + + def test_basics(self): + """Test image collection indexing, equality and special attributes.""" + # Test basics work + ic = ImageCollection.fromTargets(self.fits) + ic2 = ImageCollection.fromTargets(self.fits) + ic3 = ImageCollection.fromTargets(self.fitsFactory.get_n(5)) + + self.assertEqual(ic.meta["n_stds"], 3) + self.assertEqual(ic, ic2) + self.assertNotEqual(ic, ic3) + + # Test wcs, bbox and lazy loaded standardizers + std, ext = ic.get_standardizer(0).values() + self.assertIsInstance(std, Standardizer) + self.assertEqual(ext, 0) + + stds = ic.get_standardizers([0, 1]) + for entry in stds: + std, ext = entry.values() + self.assertIsInstance(std, Standardizer) + self.assertEqual(len(stds), 2) + + # Make sure the arrays are not empty. In case there are no wcs or + # bboxes - we're still expecting arrays of None + self.assertEqual(len(ic.wcs), 3) + self.assertEqual(len(ic.bbox), 3) + + # Test indexing works as expected + # * int -> row + # * lists or slice -> new ImageCollection + # * string -> column + row = ic[0] + self.assertIsInstance(row, atbl.Row) + + self.assertIsInstance(ic[[0, 1, 2]], ImageCollection) + self.assertTrue((ic[[0, 1, 2]] == ic).all()) + + self.assertIsInstance(ic["location"], atbl.Column) + self.assertEqual(len(ic["location"]), 3) + self.assertIsInstance(ic["mjd", "location"], atbl.Table) + + # This is kind of a thing of the standardizers themselves, but to + # ensure the standardization results are becoming columns we test for + # content, knowing KBMODV1 is the standardizer in question. + # Test internal book-keeping columns are not returned + expected_cols = [ + "mjd", + "filter", + "visit_id", + "observat", + "obs_lat", + "obs_lon", + "obs_elev", + "location", + "ra", + "dec", + ] + self.assertEqual(list(ic.columns.keys()), expected_cols) + self.assertEqual(list(row.keys()), expected_cols) + + def test_write_read_unreachable(self): + """Test ImageCollection can write itself to disk, and read the written + table without raising errors when original data is unreachable. + """ + ic = ImageCollection.fromTargets(self.fitsFactory.get_n(3)) + + with tempfile.TemporaryDirectory() as tmpdir: + fname = os.path.join(tmpdir, "test.ecsv") + ic.write(fname) + ic2 = ImageCollection.read(fname) + + self.assertEqual(ic, ic2) + with self.assertRaisesRegex(FileNotFoundError, "location is not a file, but no hdulist"): + ic2.get_standardizer(0) + + def test_write_read_reachable(self): + """Test ImageCollection can write itself to disk, and fully reconstruct + itself from disk again. + """ + hduls = self.fitsFactory.get_n(3, spoof_data=True) + ic = ImageCollection.fromTargets(hduls) + + tmpdir = tempfile.mkdtemp() + for i, hdul in enumerate(hduls): + fname = os.path.join(tmpdir, f"{i:0>3}.fits") + hdul.writeto(fname) + hdul.close() + ic2 = ImageCollection.fromDir(tmpdir) + + # We can't compare location, ra, dec spoofing data changes the + # image data which updates header metadata related to these values + subset = ("mjd", "filter", "obs_lon") + self.assertTrue((ic[subset] == ic2[subset]).all()) + + fname = os.path.join(tmpdir, "reachable.ecsv") + ic2.write(fname) + ic3 = ImageCollection.read(fname) + + self.assertEqual(ic2, ic3) + + # cleanup resources + shutil.rmtree(tmpdir) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_standardizer.py b/tests/test_standardizer.py new file mode 100644 index 00000000..dcff972e --- /dev/null +++ b/tests/test_standardizer.py @@ -0,0 +1,349 @@ +import unittest +import tempfile +import warnings +import os + +from astropy.io import fits as fitsio +from astropy.time import Time +import numpy as np + +from utils import DECamImdiffFactory +from kbmod import PSF, Standardizer, StandardizerConfig +from kbmod.standardizers import ( + KBMODV1, + KBMODV1Config, + FitsStandardizer, + SingleExtensionFits, + MultiExtensionFits, +) + + +# Use a shared factory to skip having to untar the archive +FitsFactory = DECamImdiffFactory() + + +class MyStd(KBMODV1): + """Custom standardizer for testing Standardizer registration""" + + name = "MyStd" + priority = 3 + testing_kwargs = False + + @classmethod + def yesStandardize(cls, tgt): + _, resources = super().resolveTarget(tgt) + return True, resources + + @classmethod + def noStandardize(cls, tgt): + return False, {} + + @classmethod + def resolveTarget(cls, tgt): + return cls.noStandardize(tgt) + + def __init__(self, *args, required_flag, optional_flag=False, **kwargs): + super().__init__(*args, **kwargs) + if not self.testing_kwargs: + self.required_flag = False + else: + self.required_flag = required_flag + self.optional_flag = optional_flag + + def translateHeader(self): + # invoke the parent functionality to standardize the default values + metadata = super().translateHeader() + metadata["required_flag"] = self.required_flag + if self.optional_flag: + metadata["optional_flag"] = self.optional_flag + return metadata + + +class TestStandardizer(unittest.TestCase): + """Test Standardizer class.""" + + def setUp(self): + self.fits = FitsFactory.mock_fits() + # Ignore user warning about multiple standardizers, + # One of them will be the MyStd + warnings.filterwarnings(action="ignore", category=UserWarning, message="Multiple standardizers") + + def tearDown(self): + # restore defaults + MyStd.resolveTarget = MyStd.noStandardize + MyStd.priority = 3 + warnings.resetwarnings() + + def test_kwargs_to_init(self): + """Test kwargs are correctly passed from top-level Standardizer to the + underlying standardizer implementation.""" + MyStd.resolveTarget = MyStd.yesStandardize + MyStd.testing_kwargs = True + + with self.assertRaises(TypeError): + std = Standardizer.get(self.fits) + + with self.assertWarnsRegex(UserWarning, "Multiple standardizers"): + std = Standardizer.get(self.fits, required_flag=False) + stdmeta = std.standardizeMetadata() + self.assertFalse(stdmeta["required_flag"]) + + std = Standardizer.get(self.fits, required_flag=True, optional_flag=True) + stdmeta = std.standardizeMetadata() + self.assertTrue(stdmeta["required_flag"]) + self.assertIn("optional_flag", stdmeta) + self.assertTrue(stdmeta["optional_flag"]) + + def test_instantiation(self): + """Test priority, forcing and automatic selection works.""" + std = Standardizer.get(self.fits, required_flag=True) + self.assertIsInstance(std, KBMODV1) + + MyStd.resolveTarget = MyStd.yesStandardize + std = Standardizer.get(self.fits, required_flag=True) + self.assertIsInstance(std, MyStd) + + MyStd.priority = 0 + std = Standardizer.get(self.fits, required_flag=True) + self.assertIsInstance(std, KBMODV1) + + # Test forcing ignores everything + MyStd.resolveTarget = MyStd.noStandardize + MyStd.priority = 0 + std = Standardizer.get(self.fits, force=MyStd, required_flag=True) + self.assertIsInstance(std, MyStd) + + # Test instantiating from a single HDUList + std = Standardizer.get(self.fits) + self.assertIsInstance(std, Standardizer) + + # Test force direct and named + std2 = Standardizer.get(self.fits, force=KBMODV1) + self.assertIsInstance(std, KBMODV1) + self.assertEqual(std2.location, std.location) + self.assertEqual(std2.hdulist, std.hdulist) + + std2 = Standardizer.get(self.fits, force="KBMODV1") + self.assertIsInstance(std, KBMODV1) + self.assertEqual(std2.location, std.location) + self.assertEqual(std2.hdulist, std.hdulist) + + # Test from path + hdul = FitsFactory.mock_fits(spoof_data=True) + tmpf = tempfile.NamedTemporaryFile(suffix=".fits", delete=False) + hdul.writeto(tmpf.file, overwrite=True) + hdul.close() + tmpf.close() + + std2 = Standardizer.get(tmpf.name) + self.assertIsInstance(std, KBMODV1) + self.assertEqual(std2.location, tmpf.name) + self.assertEqual(len(std2.hdulist), 16) + + # clean up resources + os.unlink(tmpf.name) + + +# This is in test_standardizeer because totest Standardizer because it's easier +# than making multiple new standardizers for sake of technical clarity or style +# Test KBMODV1 more extensively than other standardizers to cover for the +# possible code-paths through Standardizer itself. TODO: eventually update this +class TestKBMODV1(unittest.TestCase): + """Test KBMODV1 Standardizer and Standardizer.""" + + def setUp(self): + self.fits = FitsFactory.mock_fits(spoof_data=True) + + def tearDown(self): + self.fits.close(output_verify="ignore") + + def test_init_direct(self): + # Test default values are as expected and that parent classes did their + # share of work + std = KBMODV1(hdulist=self.fits) + self.assertEqual(std.location, ":memory:") + self.assertEqual(std.hdulist, self.fits) + self.assertEqual(std.config, KBMODV1Config()) + self.assertEqual(std.primary, self.fits["PRIMARY"].header) + self.assertEqual( + std.processable, + [ + self.fits["IMAGE"], + ], + ) + self.assertTrue(std.isMultiExt) + self.assertTrue(KBMODV1.canStandardize(self.fits)) + + # Test init from filepath + # Test from path + hdul = FitsFactory.mock_fits(spoof_data=True) + fits_file = tempfile.NamedTemporaryFile(suffix=".fits", delete=False) + hdul.writeto(fits_file.file, overwrite=True) + hdul.close() + fits_file.close() + + # Test init from HDUList with a known location + hdul = fitsio.open(fits_file.name) + std = KBMODV1(hdulist=hdul) + self.assertEqual(std.location, fits_file.name) + + # Test init with both + std2 = KBMODV1(location=fits_file.name, hdulist=hdul) + self.assertEqual(std.location, fits_file.name) + self.assertEqual(std.hdulist, std2.hdulist) + + # Test raises when neither + with self.assertRaisesRegex(ValueError, "Expected location or HDUList"): + KBMODV1() + + # Test raises correctly when location makes no sense + with self.assertRaisesRegex(FileNotFoundError, "location is not a file"): + KBMODV1("noexist", hdulist=hdul) + KBMODV1("noexist") + + # clean up resources + os.unlink(fits_file.name) + + def test_standardization(self): + """Test KBMODV1 standardize executes and standardizes metadata.""" + std = Standardizer.get(self.fits, force=KBMODV1) + standardized = std.standardize() + + for key in ["meta", "science", "mask", "variance", "psf"]: + self.assertIn(key, standardized.keys()) + + hdr = self.fits["PRIMARY"].header + expected = { + "mjd": Time(hdr["DATE-AVG"], format="isot").mjd, + "filter": hdr["FILTER"], + "visit_id": hdr["IDNUM"], + "observat": hdr["OBSERVAT"], + "obs_lat": hdr["OBS-LAT"], + "obs_lon": hdr["OBS-LONG"], + "obs_elev": hdr["OBS-ELEV"], + "location": ":memory:", + } + + # There used to be an assertDictContainsSubset, but got deprecated? + for k, v in expected.items(): + with self.subTest("Value not standardized as expected.", key=k): + self.assertEqual(v, standardized["meta"][k]) + + # consequence of making std methods generators is that they need to be + # evaluated, see kbmov1.py, perhaps we should give up on this? + empty_array = np.zeros((5, 5), np.float32) + np.testing.assert_equal(empty_array, next(standardized["science"])) + np.testing.assert_equal(empty_array, next(standardized["variance"])) + np.testing.assert_equal(empty_array.astype(np.int32), next(standardized["mask"])) + + # these are not easily comparable because they are fits file dependent + # so just assert they exist + self.assertTrue(standardized["meta"]["wcs"]) + self.assertTrue(standardized["meta"]["bbox"]) + + def test_roundtrip(self): + """Test KBMODV1 can instantiate itself from standardized data.""" + std = Standardizer.get(self.fits, force=KBMODV1) + standardized = std.standardize() + + # Test it raises correctly when file is not on disk + with self.assertRaisesRegex(FileNotFoundError, "location is not a file, but no hdulist"): + KBMODV1(**standardized["meta"], force=KBMODV1) + + # Test it works correctly when the FITS is reachable. + fits_file = tempfile.NamedTemporaryFile(suffix=".fits", delete=False) + self.fits.writeto(fits_file.file, overwrite=True, output_verify="ignore") + fits_file.close() + + std = Standardizer.get(fits_file.name, force=KBMODV1) + standardized = std.standardize() + std2 = KBMODV1(**standardized["meta"]) + self.assertIsInstance(std2, KBMODV1) + + def test_bitmasking(self): + """Test masking with direct config works as expected.""" + # Assign each flag that exists to a pixel, standardize, then expect + # the mask to only contain those pixels that are also in mask_flags. + # The grow_kernel is so large by default it would mask the nearly the + # whole image, so we turn it off. + KBMODV1Config.grow_mask = False + mask_arr = self.fits["MASK"].data + for i, flag in enumerate(KBMODV1Config.bit_flag_map): + mask_arr.ravel()[i] = KBMODV1Config.bit_flag_map[flag] + + std = Standardizer.get(self.fits, force=KBMODV1) + standardizedMask = std.standardizeMaskImage() + + for mask in standardizedMask: + for i, flag in enumerate(KBMODV1Config.bit_flag_map): + with self.subTest("Failed to mask expected", flag=flag): + if flag in KBMODV1Config.mask_flags: + self.assertEqual(mask.ravel()[i], True) + else: + self.assertEqual(mask.ravel()[i], False) + + def test_threshold_masking(self): + """Test brightness threshold masking. Test config overrides.""" + # set one pixel that is masked and one that isn't + self.fits["IMAGE"].data[1, 1] = 1 + self.fits["IMAGE"].data[2, 2] = 3 + + conf = StandardizerConfig( + { + "grow_mask": False, + "do_threshold": True, + "brightness_threshold": 2, + } + ) + std = Standardizer.get(self.fits, force=KBMODV1, config=conf) + mask = next(std.standardizeMaskImage()) + + self.assertFalse(mask[1, 1]) + self.assertTrue(mask[2, 2]) + + def test_grow_mask(self): + """Test mask grows as expected.""" + # set central pixel to be masked, then grow that mask to all its + # neighbors. + self.fits["MASK"].data[2, 2] = KBMODV1Config.bit_flag_map["BAD"] + + conf = StandardizerConfig({"grow_mask": True, "grow_kernel_shape": (3, 3)}) + std = Standardizer.get(self.fits, force=KBMODV1, config=conf) + mask = next(std.standardizeMaskImage()) + + # Note that this is different than masking via Manhattan neighbors - + # which can be implemented by using the C++ functions in KBMODV1, do I? + # the solution now is an masked square in the center of the array + self.assertTrue(mask[1:3, 1:3].all()) + self.assertFalse(mask[:, 0].all()) + self.assertFalse(mask[0, :].all()) + self.assertFalse(mask[-1, :].all()) + self.assertFalse(mask[:, -1].all()) + + def test_psf(self): + """Test PSFs are created as expected. Test instance config overrides.""" + std = Standardizer.get(self.fits, force=KBMODV1) + + psf = next(std.standardizePSF()) + self.assertIsInstance(psf, PSF) + self.assertEqual(psf.get_std(), std.config["psf_std"]) + + std.config["psf_std"] = 2 + psf = next(std.standardizePSF()) + self.assertIsInstance(psf, PSF) + self.assertEqual(psf.get_std(), std.config["psf_std"]) + + # make sure we didn't override any of the global defaults by accident + std2 = Standardizer.get(self.fits, force=KBMODV1) + self.assertNotEqual(std2.config, std.config) + + # Test iterable PSF STD configuration + std2.config["psf_std"] = [ + 3, + ] + psf = next(std2.standardizePSF()) + self.assertEqual(psf.get_std(), std2.config["psf_std"][0]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_std_config.py b/tests/test_std_config.py new file mode 100644 index 00000000..5df4e946 --- /dev/null +++ b/tests/test_std_config.py @@ -0,0 +1,47 @@ +import unittest + +from kbmod import StandardizerConfig + + +class TestStandardizerConfig(unittest.TestCase): + """Test StandardizerConfig.""" + + def test_config(self): + """Test Standardizer Config behaves as expected.""" + # Test init from dict + expected = {"a": 1, "b": 2, "c": 3} + conf = StandardizerConfig(expected) + self.assertEqual(len(conf), 3) + self.assertEqual(list(conf.keys()), ["a", "b", "c"]) + self.assertEqual(list(conf.values()), [1, 2, 3]) + self.assertTrue("a" in conf) + self.assertFalse("noexist" in conf) + + # Test init from kwargs + conf2 = StandardizerConfig(a=1, b=2, c=3) + self.assertEqual(conf, conf2) + + # Test it behaves dict-like + with self.assertRaises(KeyError): + _ = conf2["noexist"] + + conf["a"] = 10 + self.assertEqual(conf["a"], 10) + self.assertEqual(conf2 | conf, expected) + self.assertEqual(list(iter(conf)), ["a", "b", "c"]) + + # Test .update method + conf.update(conf2) + self.assertEqual(conf, conf2) + + conf.update(expected) + self.assertEqual(conf, expected) + + conf.update({"a": 11, "b": 12, "c": 13}) + self.assertDictEqual(conf.toDict(), {"a": 11, "b": 12, "c": 13}) + + conf.update(a=1, b=2, c=3) + self.assertEqual(conf, conf2) + + with self.assertRaises(TypeError): + conf2.update([1, 2, 3]) diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py index d5231b4f..02cf2ed2 100644 --- a/tests/utils/__init__.py +++ b/tests/utils/__init__.py @@ -1 +1,2 @@ +from .mock_fits import * from . import utils_for_tests diff --git a/tests/utils/mock_fits.py b/tests/utils/mock_fits.py new file mode 100644 index 00000000..ff4b4716 --- /dev/null +++ b/tests/utils/mock_fits.py @@ -0,0 +1,277 @@ +import abc +import warnings +import tarfile + +import numpy as np +from astropy.table import Table, MaskedColumn +from astropy.utils.exceptions import AstropyUserWarning +from astropy.io.fits import HDUList, PrimaryHDU, CompImageHDU, BinTableHDU, Column + +from .utils_for_tests import get_absolute_data_path + + +__all__ = [ + "DECamImdiffFactory", + "MockFitsFileFactory", +] + + +class MockFitsFileFactory(abc.ABC): + """Mocks a collection of FITS file given a prescription. + + To mock a FITS file, as if it were opened by AstroPy, we require the raw + header data and a simple description of the layout of the FITS headers. For + a single file this consists of the ``filename``, ``hdu`` index, header + ``keyword``s, header keyword ``values``; mimicking the output format of + AstroPy's ``fitsheader`` tool not accounting for the addition of the ' + ``format`` column. + + To dump the raw data use the `dump_headers.py` script: + + python dump_header -t ascii.ecsv *fits > output_file.ecsv + + duplicating the invocation of AstroPy's ``fitsheader`` tool. That file now + contains the described columns, with the filename keys repeated for each + HDU in the FITS file, and the HDU index repeated for each keyword in the + header of that HDU. + + This class then reads, groups, casts and builds the HDUList object as if + the file was opened by `astropy.io.fits.open` function. + + To use the class, you must inherit from it and specify the property, + `hdu_types` which should, possibly construct and, return a list of HDU + classes (f.e. `PrimaryHDU`, `CompImageHDU`, `BinTableHDU` etc.). The length + of the list must match the length of the HDU's within each HDUList in the + dumped data. + + This class does not construct the ``.data`` attribute. + + Parameters + ---------- + archive_name : `str` + Name of the TAR archive in ``data`` dir containing raw data. + fname : `str` + Filename within the archive that contains the wanted raw data. + compression : `str`, optional + Compression used to compress the archive. Assumed to be ``bz2``. + format : `str`, optional + Format in which the data was dumped. One of AstroPy's supported + ``io`` formats. Defaults to ``ecsv``. + """ + + # will almost never be anything else. Rather, it would be a miracle if it + # were something else, since FITS standard shouldn't allow it. Further + # casting by some packages will always be casting implemented in terms of + # parsing these types. + lexical_type_map = { + "int": int, + "str": str, + "float": float, + "bool": bool, + } + """Map between type names and types themselves.""" + + def __init__(self, archive_name, fname, compression="bz2", format="ascii.ecsv"): + archive_path = get_absolute_data_path(archive_name) + with tarfile.open(archive_path, f"r:{compression}") as archive: + tblfile = archive.extractfile(fname) + table = Table.read(tblfile.read().decode(), format=format) + # sometimes empty strings get serialized as masked, to cover that + # eventuality we'll just substitute an empty string + if isinstance(table["value"], MaskedColumn): + table["value"].fill_value = "" + table["value"] = table["value"].filled() + + # Create HDU groups for easier iteration + self.table = table.group_by("filename") + self.n_files = len(self.table.groups) + + # Internal counter for the current fits index, + # so that we may auto-increment it and avoid returning + # the same data all the time. + self._current = 0 + + @abc.abstractproperty + def hdu_types(self): + """A list of HDU types for each HDU in the HDUList""" + # index and type of HDU map + raise NotImplementedError() + + def spoof_data(self, hdul, **kwargs): + """Write fits(es) to file. See HDUList.writeto""" + raise NotImplementedError("This mock factory did not implement data spoofing.") + + # good for debugging, leave it + def get_item(self, group_idx, key, hdu_idx=None): + """Get (key, value, hdu_idx) triplet from the raw data grouped by + filename and, optionally, also grouped by HDU index.""" + group = self.table.groups[group_idx] + mask = group["keyword"] == key + if hdu_idx is not None: + mask = mask & group["hdu"] == hdu_idx + return (key, group[mask]["value"], group[mask]["hdu"]) + + def get_value(self, group_idx, key, hdu_idx=None): + """Get (value, hdu_idx) from the raw data grouped by filename and, + optionally, also grouped by HDU index.""" + return self.get_item(group_idx, key, hdu_idx)[1:] + + def lexical_cast(self, value, format): + """Cast str literal of a type to the type itself. Supports just + the builtin Python types. + """ + if format in self.lexical_type_map: + return self.lexical_type_map[format](value) + return value + + def get_fits(self, fits_idx, spoof_data=False): + """Create a FITS file using the raw data selected by the `fits_idx`. + + **IMPORTANT**: MockFactories guarantee to return Headers that match the + original header raw data. Spoofed data is, however, not guaranteed to + respect the original data dimensions and size, just the data layout. + The practical implication of this is that, for example: + header["NAXIS1"] (or 2) does not match hdu.data.shape[0] (or 1); or + that writing the file to disk and reading it again is not guaranteed to + roundtrip the header data anymore, even with output_verify set to + ``ignore``: + + hdu = get_fits(0) + hdu.writeto("test.fits", output_verify="ignore") + hdu2 = fitsio.open("test.fits", output_verify="ignore") + hdu2 == hdu --> False + + The change usually affects NAXIS1/2 cards for all HDU types, but could + also alter PGCOUNT, GCOUNT, TFIELDS, NAXIS as well as endianness. + """ + hdul = HDUList() + file_group = self.table.groups[fits_idx % self.n_files] + hdu_group = file_group.group_by("hdu") + + # nearly every following command will be issuing warnings, but they + # are not important - all are HIERARCH card creation warnings for keys + # that are longer than 8 characters. + warnings.filterwarnings("ignore", category=AstropyUserWarning) + for hdr_idx, HDUClass in enumerate(self.hdu_types): + hdr = HDUClass() + for k, v, f in hdu_group.groups[hdr_idx]["keyword", "value", "format"]: + hdr.header[k] = self.lexical_cast(v, f) + hdul.append(hdr) + warnings.resetwarnings() + + if spoof_data: + hdul = self.spoof_data(hdul) + + return hdul + + def get_range(self, start_idx, end_idx, spoof_data=False): + """Get a list of HDUList objects from the specified range. + When range exceeds the number of available serialized headers it's + wrapped back to start. + + Does not update the current index counter. + """ + if not (start_idx < end_idx): + raise ValueError( + "Expected starting index to be smaller than the " + f"ending index. Got start={start_idx}, end={end_idx}" + ) + files = [] + for i in range(start_idx, end_idx): + files.append(self.get_fits(i % self.n_files, spoof_data)) + return files + + def get_n(self, n, spoof_data=False): + """Get next n fits files. Wraps around when available `n_files` + is exceeded. Updates the current index counter.""" + files = self.get_range(self._current, self._current + n, spoof_data) + self._current = (self._current + n) % self.n_files + return files + + def mock_fits(self, spoof_data=False): + """Return new mocked FITS. + + Raw data is read sequentially, once exhausted it's reset and starts + over again. + """ + self._current = (self._current + 1) % self.n_files + return self.get_fits(self._current, spoof_data) + + +class DECamImdiffFactory(MockFitsFileFactory): + """Mocks a Vera C. Rubin Science Pipelines Imdiff Data Product, as it was + produced by the Science Pipelines and procedure described in + arXiv:2109.03296 + + The FITS file contained 16 HDUs, one PRIMARY, 3 images (image, mask and + variance) and various supporting data such as PSF, ArchiveId etc. stored + in `BinTableHDU`s. + + The raw data was exported from DEEP B1a field, as described in + arXiv:2310.03678. + + The raw data for this factory can be found in the + ``data/decam_imdif_headers.tar.bz2`` file, which approximately, contains + real header data from 60-odd different FITS files produced by Rubin Science + Pipelines. The used files contained ``imdiff`` data product. + + Examples + -------- + >>>> fitsFactory = DECamImdiffFactory() + + Get the next FITS file from the list. Once all files are exhausted + `mock_fits` will start from the beginning again. + + >>>> fitsFactory.mock_fits() + [astropy.io.fits.hdu.image.... ] + + Get a particular file from from the list of all files. In this example the + zeroth file in the list. Useful repeatability and predictability of the + results is wanted: + + >>>> fitsFactory.get_fits(fits_idx=0) + """ + + def __init__(self, archive_name="decam_imdiff_headers.ecsv.tar.bz2", fname="decam_imdiff_headers.ecsv"): + super().__init__(archive_name, fname) + + # don't let Black have its way with these lines because it's a massacre + # fmt: off + self.hdus = [PrimaryHDU, ] + self.hdus.extend([CompImageHDU, ] * 3) + self.hdus.extend([BinTableHDU, ] * 12) + # fmt: on + + @property + def hdu_types(self): + return self.hdus + + def spoof_data(self, hdul): + # Mocking FITS files is hard. The data is, usually, well described by + # the header, to the point where it's possible to construct byte + # offsets and memmap the fits, like FITS readers usually do. Spoofing + # the data attribute requires us to respect the data type, size and + # shape. It's of course silly to have to write, f.e., a 10 by 2k + # BinTable because the header says so. We get by mostly because our + # Standardizers don't need to read them, so AstroPy never raises an + # error. Eventually it is possible, that one of them could want to read + # the PSF HDU, which we will then have to spoof correctly. + # On top of that AstroPy will, rightfully, be very opinionated about + # checking metadata of the data matches the data itself and will throw + # warnings and errors. We have to match data type using dtypes because + # they default to 64bit representation in pure Python as well as number + # of columns and rows - even if we leave the HDU data itself empty! + # Of course, storing data takes too much space, so now we have to reverse + # engineer the data, on a per-standardizer level, taking care we don't + # hit any of these roadblocks. + empty_array = np.zeros((5, 5), np.float32) + hdul["IMAGE"].data = empty_array + hdul["VARIANCE"].data = empty_array + hdul["MASK"].data = empty_array.astype(np.int32) + + # These are the 12 BinTableHDUs we're not using atm + for i, hdu in enumerate(hdul[4:]): + nrows = hdu.header["TFIELDS"] + hdul[i + 4] = BinTableHDU(data=np.zeros((nrows,), dtype=hdu.data.dtype), header=hdu.header) + + return hdul