From 8b4a87c8f0ee287087b17f17de6fd28097f57c06 Mon Sep 17 00:00:00 2001 From: Guy Nir <37179063+guynir42@users.noreply.github.com> Date: Thu, 20 Jun 2024 23:55:55 -0700 Subject: [PATCH] Background model (#308) Add Background as an Image data product, add backgrounding and Backgrounder as a configurable pipeline step. --- ...6_10_1132-a375526c8260_background_table.py | 86 ++++ default_config.yaml | 19 +- docs/troubleshooting_sqla.md | 147 ++++++ improc/alignment.py | 98 +++- improc/photometry.py | 37 +- improc/simulator.py | 5 +- models/background.py | 433 ++++++++++++++++++ models/base.py | 4 + models/cutouts.py | 2 +- models/enums_and_bitflags.py | 58 ++- models/exposure.py | 1 + models/image.py | 106 +++-- models/instrument.py | 2 + models/measurements.py | 35 +- models/provenance.py | 1 + models/psf.py | 22 +- models/reference.py | 22 +- models/report.py | 54 ++- models/source_list.py | 42 +- models/world_coordinates.py | 27 +- models/zero_point.py | 20 +- pipeline/backgrounding.py | 169 +++++++ pipeline/coaddition.py | 32 +- pipeline/cutting.py | 4 +- pipeline/data_store.py | 112 ++++- pipeline/detection.py | 161 +++++-- pipeline/measuring.py | 40 +- pipeline/subtraction.py | 13 +- pipeline/top_level.py | 53 ++- requirements.txt | 2 +- tests/conftest.py | 1 - tests/fixtures/decam.py | 5 +- tests/fixtures/pipeline_objects.py | 98 +++- tests/fixtures/ptf.py | 41 +- tests/fixtures/simulated.py | 2 +- tests/improc/test_alignment.py | 19 +- tests/improc/test_zogy.py | 3 +- tests/models/test_background.py | 102 +++++ tests/models/test_measurements.py | 27 +- tests/models/test_objects.py | 2 +- tests/models/test_provenance.py | 66 +++ tests/models/test_reports.py | 92 ++-- tests/models/test_source_list.py | 15 +- tests/pipeline/test_backgrounding.py | 52 +++ tests/pipeline/test_coaddition.py | 92 ++-- tests/pipeline/test_extraction.py | 147 ++---- tests/pipeline/test_measuring.py | 51 +-- tests/pipeline/test_photo_cal.py | 2 +- tests/pipeline/test_pipeline.py | 48 +- tests/pipeline/test_subtraction.py | 6 +- util/util.py | 3 +- 51 files changed, 2133 insertions(+), 548 deletions(-) create mode 100644 alembic/versions/2024_06_10_1132-a375526c8260_background_table.py create mode 100644 docs/troubleshooting_sqla.md create mode 100644 models/background.py create mode 100644 pipeline/backgrounding.py create mode 100644 tests/models/test_background.py create mode 100644 tests/pipeline/test_backgrounding.py diff --git a/alembic/versions/2024_06_10_1132-a375526c8260_background_table.py b/alembic/versions/2024_06_10_1132-a375526c8260_background_table.py new file mode 100644 index 00000000..4b5d328f --- /dev/null +++ b/alembic/versions/2024_06_10_1132-a375526c8260_background_table.py @@ -0,0 +1,86 @@ +"""background table + +Revision ID: a375526c8260 +Revises: a7dde2327dde +Create Date: 2024-06-10 11:32:39.717922 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'a375526c8260' +down_revision = 'a7dde2327dde' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('backgrounds', + sa.Column('_format', sa.SMALLINT(), nullable=False), + sa.Column('_method', sa.SMALLINT(), nullable=False), + sa.Column('image_id', sa.BigInteger(), nullable=False), + sa.Column('value', sa.Float(), nullable=False), + sa.Column('noise', sa.Float(), nullable=False), + sa.Column('provenance_id', sa.String(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('modified', sa.DateTime(), nullable=False), + sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False), + sa.Column('filepath_extensions', postgresql.ARRAY(sa.Text(), zero_indexes=True), nullable=True), + sa.Column('md5sum', sa.UUID(), nullable=True), + sa.Column('md5sum_extensions', postgresql.ARRAY(sa.UUID(), zero_indexes=True), nullable=True), + sa.Column('filepath', sa.Text(), nullable=False), + sa.Column('_bitflag', sa.BIGINT(), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('_upstream_bitflag', sa.BIGINT(), nullable=False), + sa.ForeignKeyConstraint(['image_id'], ['images.id'], name='backgrounds_image_id_fkey', ondelete='CASCADE'), + sa.ForeignKeyConstraint(['provenance_id'], ['provenances.id'], name='backgrounds_provenance_id_fkey', ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id') + ) + op.create_index('backgrounds_image_id_provenance_index', 'backgrounds', ['image_id', 'provenance_id'], unique=True) + op.create_index(op.f('ix_backgrounds__bitflag'), 'backgrounds', ['_bitflag'], unique=False) + op.create_index(op.f('ix_backgrounds__upstream_bitflag'), 'backgrounds', ['_upstream_bitflag'], unique=False) + op.create_index(op.f('ix_backgrounds_created_at'), 'backgrounds', ['created_at'], unique=False) + op.create_index(op.f('ix_backgrounds_filepath'), 'backgrounds', ['filepath'], unique=True) + op.create_index(op.f('ix_backgrounds_id'), 'backgrounds', ['id'], unique=False) + op.create_index(op.f('ix_backgrounds_image_id'), 'backgrounds', ['image_id'], unique=False) + op.create_index(op.f('ix_backgrounds_noise'), 'backgrounds', ['noise'], unique=False) + op.create_index(op.f('ix_backgrounds_provenance_id'), 'backgrounds', ['provenance_id'], unique=False) + op.create_index(op.f('ix_backgrounds_value'), 'backgrounds', ['value'], unique=False) + op.add_column('source_lists', sa.Column('inf_aper_num', sa.SMALLINT(), nullable=True)) + op.add_column('source_lists', sa.Column('best_aper_num', sa.SMALLINT(), nullable=True)) + op.drop_column('source_lists', '_inf_aper_num') + + op.add_column('measurements', sa.Column('bkg_mean', sa.REAL(), nullable=False)) + op.add_column('measurements', sa.Column('bkg_std', sa.REAL(), nullable=False)) + op.add_column('measurements', sa.Column('bkg_pix', sa.REAL(), nullable=False)) + op.drop_column('measurements', 'background') + op.drop_column('measurements', 'background_err') + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('measurements', sa.Column('background_err', sa.REAL(), autoincrement=False, nullable=False)) + op.add_column('measurements', sa.Column('background', sa.REAL(), autoincrement=False, nullable=False)) + op.drop_column('measurements', 'bkg_pix') + op.drop_column('measurements', 'bkg_std') + op.drop_column('measurements', 'bkg_mean') + + op.add_column('source_lists', sa.Column('_inf_aper_num', sa.SMALLINT(), autoincrement=False, nullable=True)) + op.drop_column('source_lists', 'best_aper_num') + op.drop_column('source_lists', 'inf_aper_num') + op.drop_index(op.f('ix_backgrounds_value'), table_name='backgrounds') + op.drop_index(op.f('ix_backgrounds_provenance_id'), table_name='backgrounds') + op.drop_index(op.f('ix_backgrounds_noise'), table_name='backgrounds') + op.drop_index(op.f('ix_backgrounds_image_id'), table_name='backgrounds') + op.drop_index(op.f('ix_backgrounds_id'), table_name='backgrounds') + op.drop_index(op.f('ix_backgrounds_filepath'), table_name='backgrounds') + op.drop_index(op.f('ix_backgrounds_created_at'), table_name='backgrounds') + op.drop_index(op.f('ix_backgrounds__upstream_bitflag'), table_name='backgrounds') + op.drop_index(op.f('ix_backgrounds__bitflag'), table_name='backgrounds') + op.drop_index('backgrounds_image_id_provenance_index', table_name='backgrounds') + op.drop_table('backgrounds') + # ### end Alembic commands ### diff --git a/default_config.yaml b/default_config.yaml index 4f676164..68658624 100644 --- a/default_config.yaml +++ b/default_config.yaml @@ -83,10 +83,21 @@ preprocessing: extraction: sources: + method: sextractor measure_psf: true + apertures: [1.0, 2.0, 3.0, 5.0] + inf_aper_num: -1 + best_aper_num: 0 + aperunit: fwhm + separation_fwhm: 1.0 threshold: 3.0 - method: sextractor - + subtraction: false + bg: + format: map + method: sep + poly_order: 1 + sep_box_size: 128 + sep_filt_size: 3 wcs: cross_match_catalog: gaia_dr3 solution_method: scamp @@ -94,7 +105,6 @@ extraction: mag_range_catalog: 4.0 min_catalog_stars: 50 max_sources_to_use: [2000, 1000, 500, 200] - zp: cross_match_catalog: gaia_dr3 max_catalog_mag: [20.0] @@ -118,7 +128,7 @@ cutting: measuring: annulus_radii: [10, 15] annulus_units: pixels - chosen_aperture: 0 + use_annulus_for_centroids: true analytical_cuts: ['negatives', 'bad pixels', 'offsets', 'filter bank'] outlier_sigma: 3.0 bad_pixel_radius: 3.0 @@ -175,6 +185,7 @@ coaddition: measure_psf: true threshold: 3.0 method: sextractor + background_method: zero # The following are used to override the regular astrometric calibration parameters wcs: cross_match_catalog: gaia_dr3 diff --git a/docs/troubleshooting_sqla.md b/docs/troubleshooting_sqla.md new file mode 100644 index 00000000..d754f28a --- /dev/null +++ b/docs/troubleshooting_sqla.md @@ -0,0 +1,147 @@ +## Troubleshooting SQLAlchemy + +Here is a growing list of common issues and solutions for SQLAlchemy. + +#### Adding this object again causes a unique constraint violation + +This is a common issue when you are trying to add an object to the session that is already on the DB. +Instead, use merge, and make sure to assign the merged object to a variable (often with the same name) +and keep using that. There's no real advantage to using `session.add()` over `session.merge()`. + +Example: + +```python +obj = session.merge(obj) +``` + +#### Related objects get added to the session (and database) when they are not supposed to + +This is a hard one, where a complex web of relationships is causing SQLAlchemy to add objects to the session +when they are not supposed to. +This happens when you `session.merge()` an object, not just on `session.add()`. +This is especially tricky when you are trying to delete a parent, so you merge it first, +and then you end up adding the children instead. +Usually the relationship will merge and then delete the children using cascades, +but some complex relationships may not work that way. +If you notice things are getting added when they shouldn't, check the session state before committing/flushing. + +The places to look are: +```python +session.identity_map.keys() +session.new +session.dirty +session.deleted +``` + +If unwanted objects appear there, try to `session.expunge()` them before committing, or if they are persistent, +you may need to `session.delete()` them instead. + +#### Double adding a related object through cascades + +Sometimes when a child is merged (or added) into a session, the parent is not automatically added. +Then, when the parent is added to the session on its own, it gets added as a new object, that can trigger +unique violations (or, worse, just add duplicates). + +The root of this problem is that the child object is merged without the parent. +Remember that a merged object is a new copy of the original, only connected to the session. +If you don't cascade the merge to the parent, you can't just assign the parent to the new object. +The parent object still keeps a reference to the old child object, and that one is not on the session. +Instead, make sure the merged child is assigned a merged parent, and that the parent is related + +#### Cannot access related children when parent is not in the session + +This happens when a parent object is not in the session, but you want to access its children. +The error message is usually something like this: + +``` +sqlalchemy.orm.exc.DetachedInstanceError: Parent instance is not bound to a Session; +lazy load operation of attribute 'children' cannot proceed +``` + +This happens under three possible circumstances. +1. The relationship is lazy loaded (which we generally try to avoid). + Check the relationship definition has `lazy='selectin'`. +2. The parent object was loaded as a related object itself, and that loading did not recursively load the children. + Most objects will recursively load related objects of related objects, but in some cases this doesn't work, + in particular when there's a many-to-many relationship via an association table (e.g., Provenance.upstreams). + This is fixed by setting the `join_depth=1` or higher, as documented + [here](https://docs.sqlalchemy.org/en/20/orm/self_referential.html#configuring-self-referential-eager-loading) +3. The session has rolled back, or committed (this option only if you've changed to expire_on_commit=True). + We usually have expire_on_commit=False, so that objects do not get expired when the session is committed. + However, when the session is rolled back, all objects are expired. That means you cannot use related objects, + or even regular attributes, after a rollback. In most cases, a rollback is due to some crash, so having some + errors accessing attributes/relationships while handling exceptions and "gracefully" exiting the program is expected, + and doesn't require too much attention. If, however, you explicitly called a rollback, you should expect to have + expired objects, and should go ahead and `session.refresh()` all the objects you need to use. + +#### Parent not in session, update along children is not updated in the database (Warning only) + +This is a warning that tells you that even though you added / deleted a child object, +the relationship cannot automatically update the object in the database, because the parent +is not connected to a session. + +This is sometimes important but a lot of times meaningless. For example, if you deleted Parent, +and then go on to remove the children from it, it makes little difference that the relationship +is no longer emitting SQL changes, because the parent is going to be deleted anyway. + + +#### `When initializing mapper Mapper[...], expression '...' failed to locate a name ` + +This happens when a related object class is not imported when the relationship needs to be instantiated. + +When two classes, A and B, are related to each other, we would see a definition like this: + +```python +class A(Base): + __tablename__ = 'a' + id = Column(Integer, primary_key=True) + b_id = Column(Integer, ForeignKey('b.id')) + b = relationship('B') + +class B(Base): + __tablename__ = 'b' + id = Column(Integer, primary_key=True) + a_id = Column(Integer, ForeignKey('a.id')) + a = relationship('A') +``` + +Notice that the `relationship` function is called with a string argument. +This is because the class `B` is not defined yet when the class `A` is defined. +This solves a "chicken and egg" problem, by making a promise to the mapper that +when the relationships are instatiated, both classes will have been imported. + +If some of the related objects are on a different file (module) and that file +is not imported by any of the code you are running, you will get the error above. + +This usually happens on scripts and parallel pipelines that only use a subset of the classes. +To fix this, simply import the missing class module at the beginning of the script. + + +#### Changing the primary key of an object causes update instead of new object + +For objects that don't have an auto-incrementing primary key (e.g., Provenance), +the user is in control of the value that goes into the primary key. +Sometimes, the user changes this value, e.g., when a Provenance gets new parameters +and the `update_id()` method is called. + +If the object is already in the session, and the primary key is changed, SQLAlchemy +will update the object in the database, instead of creating a new one. +This will remove the old object and may cause problems with objects that relate to +that row in the table. + +Make sure to detach your object, or make a brand new one and copy properties over +to the new instance before merging it back into the session as a new object. + + +#### Deadlocks when querying the database + +This can occur when an internal session is querying the same objects +that an external session is using. +In general, you should not be opening an internal session when a different one is open, +instead, pass the session as an argument into the lower scope so all functions use the same session. + +If the app freezes, check for a deadlock: +Go into the DB and do `select * from pg_locks;` to see if there are many locks. + +Sometimes using `SELECT pg_cancel_backend(pid) FROM pg_locks; ` will free the lock. +Otherwise, try to restart the psql service. diff --git a/improc/alignment.py b/improc/alignment.py index aae18a27..5eca7df4 100644 --- a/improc/alignment.py +++ b/improc/alignment.py @@ -3,16 +3,16 @@ import random import time import subprocess +import warnings import numpy as np import astropy.table import astropy.wcs.utils -from astropy.io import fits from util import ldac from util.exceptions import SubprocessFailure -from util.util import read_fits_image +from util.util import read_fits_image, save_fits_image_file from util.logger import SCLogger import improc.scamp import improc.tools @@ -21,6 +21,7 @@ from models.provenance import Provenance from models.image import Image from models.source_list import SourceList +from models.background import Background from models.enums_and_bitflags import string_to_bitflag, flag_image_bits_inverse from pipeline.data_store import DataStore @@ -216,13 +217,15 @@ def _align_swarp( self, image, target, sources, target_sources ): tmptargetcat = tmppath / f'{tmpname}_target.sources.fits' tmpim = tmppath / f'{tmpname}_image.fits' tmpflags = tmppath / f'{tmpname}_flags.fits' + tmpbg = tmppath / f'{tmpname}_bg.fits' outim = tmppath / f'{tmpname}_warped.image.fits' outwt = tmppath / f'{tmpname}_warped.weight.fits' outfl = tmppath / f'{tmpname}_warped.flags.fits' + outbg = tmppath / f'{tmpname}_warped.bg.fits' outimhead = tmppath / f'{tmpname}_warped.image.head' outflhead = tmppath / f'{tmpname}_warped.flags.head' - + outbghead = tmppath / f'{tmpname}_warped.bg.head' swarp_vmem_dir = tmppath /f'{tmpname}_vmem' # Writing this all out because several times I've looked at code @@ -325,6 +328,7 @@ def _align_swarp( self, image, target, sources, target_sources ): hdr['NAXIS2'] = target.data.shape[0] hdr.tofile( outimhead ) hdr.tofile( outflhead ) + hdr.tofile( outbghead ) # Warp the image # TODO : support single image with image, weight, flags in @@ -347,14 +351,18 @@ def _align_swarp( self, image, target, sources, target_sources ): # putting in a symbolic link for the full FITS, instead of # copying the FITS data as here. Look into that.) - with fits.open( impaths[imdex], mode='readonly' ) as hdul: - improc.tools.strip_wcs_keywords( hdul[0].header ) - hdul[0].header.update( imagewcs.wcs.to_header() ) - hdul.writeto( tmpim ) - with fits.open( impaths[fldex], mode='readonly' ) as hdul: - improc.tools.strip_wcs_keywords( hdul[0].header ) - hdul[0].header.update( imagewcs.wcs.to_header() ) - hdul.writeto( tmpflags ) + hdr = image.header.copy() + improc.tools.strip_wcs_keywords(hdr) + hdr.update(imagewcs.wcs.to_header()) + if image.bg is None: + # to avoid this warning, consider adding a "zero" background object to the image + warnings.warn("No background image found. Using original image data.") + data = image.data + else: + data = image.data_bgsub + + save_fits_image_file(tmpim, data, hdr, extname=None, single_file=False) + save_fits_image_file(tmpflags, image.flags, hdr, extname=None, single_file=False) swarp_vmem_dir.mkdir( exist_ok=True, parents=True ) @@ -402,8 +410,9 @@ def _align_swarp( self, image, target, sources, target_sources ): warpedim.data, warpedim.header = read_fits_image( outim, output="both" ) # TODO: either make this not a hardcoded header value, or verify - # that we've constructed these images to have these hardcoded values - # (which would probably be a mistake, since it a priori assumes two amps). + # that we've constructed these images to have these hardcoded values + # (which would probably be a mistake, since it a priori assumes two amps). + # Issue #216 for att in ['SATURATA', 'SATURATB']: if att in image.header: warpedim.header[att] = image.header[att] @@ -412,6 +421,43 @@ def _align_swarp( self, image, target, sources, target_sources ): warpedim.flags = read_fits_image(outfl) warpedim.flags = np.rint(warpedim.flags).astype(np.uint16) # convert back to integers + # warp the background noise image: + if image.bg is not None: + bg = Background( + value=0, + noise=image.bg.noise, + format=image.bg.format, + method=image.bg.method, + _bitflag=image.bg._bitflag, + image=warpedim, + provenance=image.bg.provenance, + provenance_id=image.bg.provenance_id, + ) + # TODO: what about polynomial model backgrounds? + if image.bg.format == 'map': + save_fits_image_file(tmpbg, image.bg.variance, hdr, extname=None, single_file=False) + command = ['swarp', tmpbg, + '-IMAGEOUT_NAME', outbg, + '-SUBTRACT_BACK', 'N', + '-RESAMPLE_DIR', FileOnDiskMixin.temp_path, + '-VMEM_DIR', swarp_vmem_dir, + # '-VMEM_DIR', '/tmp', + '-VMEM_MAX', '1024', + '-MEM_MAX', '1024', + '-WRITE_XML', 'N'] + + t0 = time.perf_counter() + res = subprocess.run(command, capture_output=True, timeout=self.pars.swarp_timeout) + t1 = time.perf_counter() + SCLogger.debug(f"swarp of background took {t1 - t0:.2f} seconds") + if res.returncode != 0: + raise SubprocessFailure(res) + + bg.variance = read_fits_image(outbg, output='data') + bg.counts = np.zeros_like(bg.variance) + + warpedim.bg = bg + # re-calculate the source list and PSF for the warped image extractor = Detector() extractor.pars.override(sources.provenance.parameters['sources'], ignore_addons=True) @@ -449,11 +495,14 @@ def _align_swarp( self, image, target, sources, target_sources ): tmptargetcat.unlink( missing_ok=True ) tmpim.unlink( missing_ok=True ) tmpflags.unlink( missing_ok=True ) + tmpbg.unlink( missing_ok=True ) outim.unlink( missing_ok=True ) outwt.unlink( missing_ok=True ) outfl.unlink( missing_ok=True ) + outbg.unlink( missing_ok=True ) outimhead.unlink( missing_ok=True ) outflhead.unlink( missing_ok=True ) + outbghead.unlink( missing_ok=True ) for f in swarp_vmem_dir.iterdir(): f.unlink() swarp_vmem_dir.rmdir() @@ -506,10 +555,33 @@ def run( self, source_image, target_image ): if target_image == source_image: warped_image = Image.copy_image( source_image ) warped_image.type = 'Warped' + if source_image.bg is None: + warnings.warn("No background image found. Using original image data.") + warped_image.data = source_image.data + warped_image.bg = None # this will be a problem later if you need to coadd the images! + else: + warped_image.data = source_image.data_bgsub + # make a copy of the background object but with zero mean + bg = Background( + value=0, + noise=source_image.bg.noise, + format=source_image.bg.format, + method=source_image.bg.method, + _bitflag=source_image.bg._bitflag, + image=warped_image, + provenance=source_image.bg.provenance, + provenance_id=source_image.bg.provenance_id, + ) + if bg.format == 'map': + bg.counts = np.zeros_like(warped_image.data) + bg.variance = source_image.bg.variance + warped_image.bg = bg + warped_image.psf = source_image.psf warped_image.zp = source_image.zp warped_image.wcs = source_image.wcs # TODO: what about SourceList? + # TODO: should these objects be copies of the products, or references to the same objects? else: # Do the warp if self.pars.method == 'swarp': SCLogger.debug( 'Aligning with swarp' ) diff --git a/improc/photometry.py b/improc/photometry.py index 4ea12415..3c9918e3 100644 --- a/improc/photometry.py +++ b/improc/photometry.py @@ -251,6 +251,7 @@ def iterative_cutouts_photometry( normalizations=norms, background=background, variance=variance, + n_pix_bg=nandata.size, offset_x=cx, offset_y=cy, moment_xx=cxx, @@ -272,7 +273,7 @@ def iterative_cutouts_photometry( # for each radius, do 1-3 rounds of repositioning the centroid for i in range(iterations): - flux, area, background, variance, norm, cx, cy, cxx, cyy, cxy, failure = calc_at_position( + flux, area, background, variance, n_pix_bg, norm, cx, cy, cxx, cyy, cxy, failure = calc_at_position( nandata, r, annulus, xgrid, ygrid, cx, cy, local_bg=local_bg, full=False # reposition only! ) @@ -296,7 +297,7 @@ def iterative_cutouts_photometry( # go over each radius again and this time get all outputs (e.g., cxx) using the best centroid for j, r in enumerate(radii): - flux, area, background, variance, norm, cx, cy, cxx, cyy, cxy, failure = calc_at_position( + flux, area, background, variance, n_pix_bg, norm, cx, cy, cxx, cyy, cxy, failure = calc_at_position( nandata, r, annulus, @@ -323,6 +324,7 @@ def iterative_cutouts_photometry( photometry['areas'] = areas[::-1] # return radii and areas in increasing order photometry['background'] = background photometry['variance'] = variance + photometry['n_pix_bg'] = n_pix_bg photometry['normalizations'] = norms[::-1] # return radii and areas in increasing order photometry['offset_x'] = best_cx photometry['offset_y'] = best_cy @@ -398,6 +400,8 @@ def calc_at_position(data, radius, annulus, xgrid, ygrid, cx, cy, local_bg=True, The background level. variance: float The variance of the background. + n_pix_bg: float + Number of pixels in the background annulus. norm: float The normalization factor for the flux error (this is the sqrt of the sum of squares of the aperture mask). @@ -418,9 +422,9 @@ def calc_at_position(data, radius, annulus, xgrid, ygrid, cx, cy, local_bg=True, If True, it flags to the outer scope to stop the iterative process. """ - flux = area = background = variance = norm = cxx = cyy = cxy = 0 + flux = area = background = variance = n_pix_bg = norm = cxx = cyy = cxy = 0 if np.all(np.isnan(data)): - return flux, area, background, variance, norm, cx, cy, cxx, cyy, cxy, True + return flux, area, background, variance, n_pix_bg, norm, cx, cy, cxx, cyy, cxy, True # make a circle-mask based on the centroid position if not np.isfinite(cx) or not np.isfinite(cy): @@ -429,14 +433,14 @@ def calc_at_position(data, radius, annulus, xgrid, ygrid, cx, cy, local_bg=True, # get a circular mask mask = get_circle(radius=radius, imsize=data.shape[0], soft=soft).get_image(cx, cy) if np.nansum(mask) == 0: - return flux, area, background, variance, norm, cx, cy, cxx, cyy, cxy, True + return flux, area, background, variance, n_pix_bg, norm, cx, cy, cxx, cyy, cxy, True masked_data = data * mask flux = np.nansum(masked_data) # total flux, not per pixel! area = np.nansum(mask) # save the number of pixels in the aperture denominator = flux - masked_data_bg = masked_data + masked_data_bgsub = masked_data # get an offset annulus to get a local background estimate if full or local_bg: @@ -446,7 +450,7 @@ def calc_at_position(data, radius, annulus, xgrid, ygrid, cx, cy, local_bg=True, annulus_map[annulus_map == 0.] = np.nan # flag pixels outside annulus as nan if np.nansum(annulus_map) == 0: # this can happen if annulus is too large - return flux, area, background, variance, norm, cx, cy, cxx, cyy, cxy, True + return flux, area, background, variance, n_pix_bg, norm, cx, cy, cxx, cyy, cxy, True annulus_map_sum = np.nansum(annulus_map) if annulus_map_sum == 0 or np.all(np.isnan(annulus_map)): @@ -462,26 +466,27 @@ def calc_at_position(data, radius, annulus, xgrid, ygrid, cx, cy, local_bg=True, if local_bg: # update these to use the local background denominator = (flux - background * area) - masked_data_bg = (data - background) * mask + masked_data_bgsub = (data - background) * mask if denominator == 0: # this should only happen in pathological cases - return flux, area, background, variance, norm, cx, cy, cxx, cyy, cxy, True + return flux, area, background, variance, n_pix_bg, norm, cx, cy, cxx, cyy, cxy, True if not fixed: # update the centroids - cx = np.nansum(xgrid * masked_data_bg) / denominator - cy = np.nansum(ygrid * masked_data_bg) / denominator + cx = np.nansum(xgrid * masked_data_bgsub) / denominator + cy = np.nansum(ygrid * masked_data_bgsub) / denominator # check that we got reasonable values! if np.isnan(cx) or abs(cx) > data.shape[1] / 2 or np.isnan(cy) or abs(cy) > data.shape[0] / 2: - return flux, area, background, variance, norm, cx, cy, cxx, cyy, cxy, True + return flux, area, background, variance, n_pix_bg, norm, cx, cy, cxx, cyy, cxy, True if full: # update the second moments - cxx = np.nansum((xgrid - cx) ** 2 * masked_data_bg) / denominator - cyy = np.nansum((ygrid - cy) ** 2 * masked_data_bg) / denominator - cxy = np.nansum((xgrid - cx) * (ygrid - cy) * masked_data_bg) / denominator + cxx = np.nansum((xgrid - cx) ** 2 * masked_data_bgsub) / denominator + cyy = np.nansum((ygrid - cy) ** 2 * masked_data_bgsub) / denominator + cxy = np.nansum((xgrid - cx) * (ygrid - cy) * masked_data_bgsub) / denominator - return flux, area, background, variance, norm, cx, cy, cxx, cyy, cxy, False + n_pix_bg = annulus_map_sum + return flux, area, background, variance, n_pix_bg, norm, cx, cy, cxx, cyy, cxy, False if __name__ == '__main__': diff --git a/improc/simulator.py b/improc/simulator.py index 02de229b..206713c2 100644 --- a/improc/simulator.py +++ b/improc/simulator.py @@ -7,12 +7,15 @@ from util.logger import SCLogger - +# this is commented out as there are some problems installing it +# consider replacing with https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.moyal.html +# if this turns out to be important enough (it is not a main part of the simulator) # import pylandau from pipeline.parameters import Parameters from improc.tools import make_gaussian + class SimPars(Parameters): def __init__(self, **kwargs): diff --git a/models/background.py b/models/background.py new file mode 100644 index 00000000..59803aad --- /dev/null +++ b/models/background.py @@ -0,0 +1,433 @@ +import os + +import numpy as np + +import h5py + +import sqlalchemy as sa +import sqlalchemy.orm as orm +from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.schema import UniqueConstraint + +from models.base import Base, SeeChangeBase, SmartSession, AutoIDMixin, FileOnDiskMixin, HasBitFlagBadness +from models.image import Image + +from models.enums_and_bitflags import BackgroundFormatConverter, BackgroundMethodConverter, bg_badness_inverse + + +class Background(Base, AutoIDMixin, FileOnDiskMixin, HasBitFlagBadness): + __tablename__ = 'backgrounds' + + __table_args__ = ( + UniqueConstraint('image_id', 'provenance_id', name='_bg_image_provenance_uc'), + ) + + _format = sa.Column( + sa.SMALLINT, + nullable=False, + default=BackgroundFormatConverter.convert('scalar'), + doc='Format of the Background model. Can include scalar, map, or polynomial. ' + ) + + @hybrid_property + def format(self): + return BackgroundFormatConverter.convert(self._format) + + @format.inplace.expression + @classmethod + def format(cls): + return sa.case(BackgroundFormatConverter.dict, value=cls._format) + + @format.inplace.setter + def format(self, value): + self._format = BackgroundFormatConverter.convert(value) + + _method = sa.Column( + sa.SMALLINT, + nullable=False, + default=BackgroundMethodConverter.convert('zero'), + doc='Method used to calculate the background. ' + 'Can be an algorithm like "sep", or "zero" for an image that was already background subtracted. ', + ) + + @hybrid_property + def method(self): + return BackgroundMethodConverter.convert(self._method) + + @method.inplace.expression + @classmethod + def method(cls): + return sa.case(BackgroundMethodConverter.dict, value=cls._method) + + @method.inplace.setter + def method(self, value): + self._method = BackgroundMethodConverter.convert(value) + + image_id = sa.Column( + sa.ForeignKey('images.id', ondelete='CASCADE', name='backgrounds_image_id_fkey'), + nullable=False, + index=True, + doc="ID of the image for which this is the background." + ) + + image = orm.relationship( + 'Image', + cascade='save-update, merge, refresh-expire, expunge', + passive_deletes=True, + lazy='selectin', + doc="Image for which this is the background." + ) + + value = sa.Column( + sa.Float, + index=True, + nullable=False, + doc="Value of the background level (in units of counts), as a best representative value for the entire image." + ) + + noise = sa.Column( + sa.Float, + index=True, + nullable=False, + doc="Noise RMS of the background (in units of counts), as a best representative value for the entire image." + ) + + provenance_id = sa.Column( + sa.ForeignKey('provenances.id', ondelete="CASCADE", name='backgrounds_provenance_id_fkey'), + nullable=False, + index=True, + doc=( + "ID of the provenance of this Background object. " + "The provenance will contain a record of the code version" + "and the parameters used to produce this Background object." + ) + ) + + provenance = orm.relationship( + 'Provenance', + cascade='save-update, merge, refresh-expire, expunge', + lazy='selectin', + doc=( + "Provenance of this Background object. " + "The provenance will contain a record of the code version" + "and the parameters used to produce this Background object." + ) + ) + + __table_args__ = ( + sa.Index( 'backgrounds_image_id_provenance_index', 'image_id', 'provenance_id', unique=True ), + ) + + @property + def image_shape(self): + if self._image_shape is None and self.filepath is not None: + self.load() + return self._image_shape + + @image_shape.setter + def image_shape(self, value): + self._image_shape = value + + @property + def counts(self): + """The background counts data for this object. + + This will either be a map that is loaded directly from file, + or an interpolated map based on the polynomial or scalar value + mapped onto the image shape. + + This is a best-estimate of the sky counts, ignoring as best as + possible the sources in the sky, and looking only at the smoothed + background level. + """ + if self._counts_data is None and self.filepath is not None: + self.load() + return self._counts_data + + @counts.setter + def counts(self, value): + self._counts_data = value + + @property + def variance(self): + """The background variance data for this object. + + This will either be a map that is loaded directly from file, + or an interpolated map based on the polynomial or scalar value + mapped onto the image shape. + + This is a best-estimate of the sky noise, ignoring as best as + possible the sources in the sky, and looking only at the smoothed + background variability. + """ + if self._var_data is None and self.filepath is not None: + self.load() + return self._var_data + + @variance.setter + def variance(self, value): + self._var_data = value + + @property + def rms(self): + if self.variance is None: + return None + return np.sqrt(self.variance) + + @rms.setter + def rms(self, value): + self.variance = value ** 2 + + def _get_inverse_badness(self): + """Get a dict with the allowed values of badness that can be assigned to this object""" + return bg_badness_inverse + + def __init__( self, *args, **kwargs ): + FileOnDiskMixin.__init__( self, **kwargs ) + HasBitFlagBadness.__init__(self) + SeeChangeBase.__init__( self ) + self._image_shape = None + self._counts_data = None + self._var_data = None + + # Manually set all properties ( columns or not ) + for key, value in kwargs.items(): + if hasattr( self, key ): + setattr( self, key, value ) + + @sa.orm.reconstructor + def init_on_load( self ): + Base.init_on_load( self ) + FileOnDiskMixin.init_on_load( self ) + self._image_shape = None + self._counts_data = None + self._var_data = None + + def __setattr__(self, key, value): + if key == 'image': + if value is not None and not isinstance(value, Image): + raise ValueError(f'Background.image must be an Image object. Got {type(value)} instead. ') + self._image_shape = value.data.shape + + super().__setattr__(key, value) + + def save( self, filename=None, **kwargs ): + """Write the Background to disk. + + May or may not upload to the archive and update the + FileOnDiskMixin-included fields of this object based on the + additional arguments that are forwarded to FileOnDiskMixin.save. + + This saves an HDF5 file that contains a single group called "/background". + It will have a few attributes, notably: "format", "value", "noise" and "image_shape". + + If the format is "map", there are two datasets under this group: "background/counts" and "background/variance". + Counts represents the background counts at each location in the image, while the variance represents the noise + variability that comes from the sky, ignoring the sources (as much as possible). + + If the format is "polynomial", there are three datasets: + "background/coeffs" and "background/x_degree" and "background/y_degree". + These will include the coefficients of the polynomial, and the degree of the polynomial in x and y as such: + Constant term, x term, y term, x^2 term, xy term, y^2 term, x^3 term, x^2y term, xy^2 term, y^3 term, etc. + Which corresponds to a list of degrees: + x_degree: [0, 1, 0, 2, 1, 0, 3, 2, 1, 0, ...] + y_degree: [0, 0, 1, 0, 1, 2, 0, 1, 2, 3, ...] + + Finally, if the format is "scalar", there would not be any datasets. + + Parameters + ---------- + filename: str or path + The path to the file to write, relative to the local store + root. Do not include the extension (e.g. '.h5') at the + end of the name; that will be added automatically for all + extensions. If None, will call image.invent_filepath() to get a + filestore-standard filename and directory. + + Additional arguments are passed on to FileOnDiskMixin.save() + + """ + if self.format not in ['scalar', 'map', 'polynomial']: + raise ValueError(f'Unknown background format "{self.format}".') + + if self.value is None or self.noise is None: + raise RuntimeError( "Both value and noise must be non-None" ) + + if self.format == 'map' and (self.counts is None or self.variance is None): + raise RuntimeError( "Both counts and variance must be non-None" ) + + # TODO: add some checks for the polynomial format + + if filename is not None: + if not filename.endswith('.h5'): + filename += '.h5' + self.filepath = filename + else: + if self.image.filepath is not None: + self.filepath = self.image.filepath + else: + self.filepath = self.image.invent_filepath() + + if self.provenance is None: + raise RuntimeError("Can't invent a filepath for the Background without a provenance") + self.filepath += f'.bg_{self.provenance.id[:6]}.h5' + + h5path = os.path.join( self.local_path, f'{self.filepath}') + + with h5py.File(h5path, 'w') as h5f: + bggrp = h5f.create_group('background') + bggrp.attrs['format'] = self.format + bggrp.attrs['method'] = self.method + bggrp.attrs['value'] = self.value + bggrp.attrs['noise'] = self.noise + bggrp.attrs['image_shape'] = self.image_shape + + if self.format == 'map': + if self.counts is None or self.variance is None: + raise RuntimeError("Both counts and variance must be non-None") + if self.counts.shape != self.image_shape: + raise RuntimeError( + f"Counts shape {self.counts.shape} does not match image shape {self.image_shape}" + ) + if self.variance.shape != self.image_shape: + raise RuntimeError( + f"Variance shape {self.variance.shape} does not match image shape {self.image_shape}" + ) + + bggrp.create_dataset( 'counts', data=self.counts ) + bggrp.create_dataset( 'variance', data=self.variance ) + elif self.format == 'polynomial': + raise NotImplementedError('Currently we do not support a polynomial background model. ') + bggrp.create_dataset( 'coeffs', data=self.counts ) + bggrp.create_dataset( 'x_degree', data=self.x_degree ) + bggrp.create_dataset( 'y_degree', data=self.y_degree ) + elif self.format == 'scalar': + pass # no datasets to create + else: + raise ValueError( f'Unknown background format "{self.format}".' ) + + # Save the file to the archive and update the database record + # (From what we did above, the files are already in the right place in the local filestore.) + FileOnDiskMixin.save( self, h5path, extension=None, **kwargs ) + + def load(self, download=True, always_verify_md5=False, filepath=None): + """Load the data from the files into the _counts_data, _var_data and _image_shape fields. + + Parameters + ---------- + download : Bool, default True + If True, download the files from the archive if they're not + found in local storage. Ignored if filepath is not None. + + always_verify_md5 : Bool, default False + If the file is found locally, verify the md5 of the file; if + it doesn't match, re-get the file from the archive. Ignored + if filepath is not None. + + """ + if filepath is None: + filepath = self.get_fullpath(download=download, always_verify_md5=always_verify_md5) + + with h5py.File(filepath, 'r') as h5f: + if 'background' not in h5f: + raise ValueError('No background group found in the file. ') + loaded_format = h5f['background'].attrs['format'] + + if self.format != loaded_format: + raise ValueError( + f'Loaded background format "{loaded_format}" does not match the expected format "{self.format}".' + ) + + self.value = float(h5f['background'].attrs['value']) + self.noise = float(h5f['background'].attrs['noise']) + self.image_shape = tuple(h5f['background'].attrs['image_shape']) + + if loaded_format == 'map': + self._counts_data = h5f['background/counts'][:] + self._var_data = h5f['background/variance'][:] + elif loaded_format == 'polynomial': + raise NotImplementedError('Currently we do not support a polynomial background model. ') + self._counts_data = h5f['background/coeffs'][:] + self._x_degree = h5f['background/x_degree'][:] + self._y_degree = h5f['background/y_degree'][:] + elif loaded_format == 'scalar': + pass + else: + raise ValueError( f'Unknown background format "{loaded_format}".' ) + + def free( self ): + """Free loaded world coordinates memory. + + Wipe out the _counts_data and _var_data fields, freeing memory. + Depends on python garbage collection, so if there are other + references to those objects, the memory won't actually be freed. + + """ + self._counts_data = None + self._var_data = None + + def get_upstreams(self, session=None): + """Get the image that was used to make this Background object. """ + with SmartSession(session) as session: + return session.scalars(sa.select(Image).where(Image.id == self.image_id)).all() + + def get_downstreams(self, session=None, siblings=False): + """Get the downstreams of this Background object. + + If siblings=True then also include the SourceList, PSF, WCS, and ZP + that were created at the same time as this PSF. + """ + from models.source_list import SourceList + from models.psf import PSF + from models.world_coordinates import WorldCoordinates + from models.zero_point import ZeroPoint + from models.provenance import Provenance + + with SmartSession(session) as session: + subs = session.scalars( + sa.select(Image).where( + Image.provenance.has(Provenance.upstreams.any(Provenance.id == self.provenance.id)), + Image.upstream_images.any(Image.id == self.image_id), + ) + ).all() + output = subs + + if siblings: + # There should be exactly one source list, wcs, and zp per PSF, with the same provenance + # as they are created at the same time. + sources = session.scalars( + sa.select(SourceList).where( + SourceList.image_id == self.image_id, SourceList.provenance_id == self.provenance_id + ) + ).all() + if len(sources) != 1: + raise ValueError( + f"Expected exactly one source list for Background {self.id}, but found {len(sources)}" + ) + + output.append(sources[0]) + + psfs = session.scalars( + sa.select(PSF).where(PSF.image_id == self.image_id, PSF.provenance_id == self.provenance_id) + ).all() + if len(psfs) != 1: + raise ValueError(f"Expected exactly one PSF for Background {self.id}, but found {len(psfs)}") + + output.append(psfs[0]) + + wcs = session.scalars( + sa.select(WorldCoordinates).where(WorldCoordinates.sources_id == sources.id) + ).all() + if len(wcs) != 1: + raise ValueError(f"Expected exactly one wcs for Background {self.id}, but found {len(wcs)}") + + output.append(wcs[0]) + + zp = session.scalars(sa.select(ZeroPoint).where(ZeroPoint.sources_id == sources.id)).all() + + if len(zp) != 1: + raise ValueError(f"Expected exactly one zp for Background {self.id}, but found {len(zp)}") + + output.append(zp[0]) + + return output diff --git a/models/base.py b/models/base.py index f1a96801..f7cc1d70 100644 --- a/models/base.py +++ b/models/base.py @@ -1697,6 +1697,10 @@ def append_badness(self, value): doc='Free text comment about this data product, e.g., why it is bad. ' ) + def __init__(self): + self._bitflag = 0 + self._upstream_bitflag = 0 + def update_downstream_badness(self, session=None, commit=True, siblings=True): """Send a recursive command to update all downstream objects that have bitflags. diff --git a/models/cutouts.py b/models/cutouts.py index b35e153e..a4df997c 100644 --- a/models/cutouts.py +++ b/models/cutouts.py @@ -126,6 +126,7 @@ def ref_image(self): def __init__(self, *args, **kwargs): FileOnDiskMixin.__init__(self, *args, **kwargs) + HasBitFlagBadness.__init__(self) SeeChangeBase.__init__(self) # don't pass kwargs as they could contain non-column key-values self.format = 'hdf5' # the default should match the column-defined default above! @@ -291,7 +292,6 @@ def invent_filepath(self): filename = os.path.splitext(filename)[0] filename += '.cutouts_' - self.provenance.update_id() filename += self.provenance.id[:6] if self.format == 'hdf5': filename += '.h5' diff --git a/models/enums_and_bitflags.py b/models/enums_and_bitflags.py index 85f62131..b3673d57 100644 --- a/models/enums_and_bitflags.py +++ b/models/enums_and_bitflags.py @@ -239,6 +239,27 @@ class PSFFormatConverter( EnumConverter ): _dict_inverse = None +class BackgroundFormatConverter( EnumConverter ): + _dict = { + 0: 'scalar', + 1: 'map', + 2: 'polynomial', + } + _allowed_values = None + _dict_filtered = None + _dict_inverse = None + + +class BackgroundMethodConverter( EnumConverter ): + _dict = { + 0: 'zero', + 1: 'sep', + } + _allowed_values = None + _dict_filtered = None + _dict_inverse = None + + def bitflag_to_string(value, dictionary): """ @@ -364,6 +385,12 @@ def string_to_bitflag(value, dictionary): source_list_badness_inverse = {EnumConverter.c(v): k for k, v in source_list_badness_dict.items()} +# these are the ways a Background object is allowed to be bad +background_badness_dict = { + +} + + # these are the ways a WorldCoordinates/ZeroPoint object is allowed to be bad # mostly due to bad matches to the catalog catalog_match_badness_dict = { @@ -374,6 +401,15 @@ def string_to_bitflag(value, dictionary): catalog_match_badness_inverse = {EnumConverter.c(v): k for k, v in catalog_match_badness_dict.items()} +# TODO: need to consider what kinds of bad backgrounds we really might have +# TODO: make sure we are not repeating the same keywords in other badness dictionaries +bg_badness_dict = { + 31: 'too dense', + 32: 'bad fit', +} +bg_badness_inverse = {EnumConverter.c(v): k for k, v in bg_badness_dict.items()} + + # these are the ways a Cutouts object is allowed to be bad cutouts_badness_dict = { 41: 'cosmic ray', @@ -391,6 +427,10 @@ def string_to_bitflag(value, dictionary): data_badness_dict.update(image_badness_dict) data_badness_dict.update(cutouts_badness_dict) data_badness_dict.update(source_list_badness_dict) +data_badness_dict.update(psf_badness_dict) +data_badness_dict.update(bg_badness_dict) +data_badness_dict.update(catalog_match_badness_dict) +data_badness_dict.update(bg_badness_dict) data_badness_inverse = {EnumConverter.c(v): k for k, v in data_badness_dict.items()} if 0 in data_badness_inverse: raise ValueError('Cannot have a badness bitflag of zero. This is reserved for good data.') @@ -402,6 +442,20 @@ class BadnessConverter( EnumConverter ): _dict_filtered = None _dict_inverse = None + +# bitflag for image preprocessing steps that have been done +image_preprocessing_dict = { + 0: 'overscan', + 1: 'zero', + 2: 'dark', + 3: 'linearity', + 4: 'flat', + 5: 'fringe', + 6: 'illumination' +} +image_preprocessing_inverse = {EnumConverter.c(v):k for k, v in image_preprocessing_dict.items()} + + # bitflag used in flag images flag_image_bits = { 0: 'bad pixel', # Bad pixel flagged by the instrument @@ -422,7 +476,7 @@ class BitFlagConverter( EnumConverter ): # the list of possible processing steps from a section of an exposure up to measurements, r/b scores, and report process_steps_dict = { 1: 'preprocessing', # creates an Image from a section of the Exposure - 2: 'extraction', # creates a SourceList from an Image, and a PSF + 2: 'extraction', # creates a SourceList, PSF, Background, WorldCoordinates, and ZeroPoint from an Image 5: 'subtraction', # creates a subtraction Image 6: 'detection', # creates a SourceList from a subtraction Image 7: 'cutting', # creates Cutouts from a subtraction Image @@ -437,7 +491,7 @@ class BitFlagConverter( EnumConverter ): 1: 'image', 2: 'sources', 3: 'psf', - # 4: 'background', # not yet implemented + 4: 'bg', 5: 'wcs', 6: 'zp', 7: 'sub_image', diff --git a/models/exposure.py b/models/exposure.py index 052d3269..fa4d2fe3 100644 --- a/models/exposure.py +++ b/models/exposure.py @@ -340,6 +340,7 @@ def __init__(self, current_file=None, invent_filepath=True, **kwargs): """ FileOnDiskMixin.__init__(self, **kwargs) + HasBitFlagBadness.__init__(self) SeeChangeBase.__init__(self) # don't pass kwargs as they could contain non-column key-values self._data = None # the underlying image data for each section diff --git a/models/image.py b/models/image.py index d1efd3f1..515c0e29 100644 --- a/models/image.py +++ b/models/image.py @@ -113,6 +113,7 @@ def format(self, value): cascade='save-update, merge, refresh-expire, expunge', passive_deletes=True, lazy='selectin', + join_depth=1, # this enables the eager load of one generation of upstreams order_by='images.c.mjd', # in chronological order of exposure start time doc='Images used to produce a multi-image object, like a coadd or a subtraction. ' ) @@ -460,7 +461,6 @@ def _get_inverse_badness(self): 'data', 'flags', 'weight', - 'background', # TODO: remove this when adding the Background object (issue #186) 'score', # the matched-filter score of the image (e.g., from ZOGY) 'psfflux', # the PSF-fitted equivalent flux of the image (e.g., from ZOGY) 'psffluxerr', # the error in the PSF-fitted equivalent flux of the image (e.g., from ZOGY) @@ -468,6 +468,7 @@ def _get_inverse_badness(self): def __init__(self, *args, **kwargs): FileOnDiskMixin.__init__(self, *args, **kwargs) + HasBitFlagBadness.__init__(self) SeeChangeBase.__init__(self) # don't pass kwargs as they could contain non-column key-values self.raw_data = None # the raw exposure pixels (2D float or uint16 or whatever) not saved to disk! @@ -477,7 +478,6 @@ def __init__(self, *args, **kwargs): self._data = None # the underlying pixel data array (2D float array) self._flags = None # the bit-flag array (2D int array) self._weight = None # the inverse-variance array (2D float array) - self._background = None # an estimate for the background flux (2D float array) self._score = None # the image after filtering with the PSF and normalizing to S/N units (2D float array) self._psfflux = None # the PSF-fitted equivalent flux of the image (2D float array) self._psffluxerr = None # the error in the PSF-fitted equivalent flux of the image (2D float array) @@ -485,6 +485,7 @@ def __init__(self, *args, **kwargs): # additional data products that could go with the Image self.sources = None # the sources extracted from this Image (optionally loaded) self.psf = None # the point-spread-function object (optionally loaded) + self.bg = None # the background object (optionally loaded) self.wcs = None # the WorldCoordinates object (optionally loaded) self.zp = None # the zero-point object (optionally loaded) @@ -526,6 +527,7 @@ def init_on_load(self): self.sources = None self.psf = None + self.bg = None self.wcs = None self.zp = None @@ -594,16 +596,14 @@ def merge_all(self, session): self.psf.image_id = new_image.id self.psf.provenance_id = self.psf.provenance.id if self.psf.provenance is not None else None new_image.psf = self.psf.safe_merge(session=session) - if new_image.psf._bitflag is None: # I don't know why this isn't set to 0 using the default - new_image.psf._bitflag = 0 - if new_image.psf._upstream_bitflag is None: # I don't know why this isn't set to 0 using the default - new_image.psf._upstream_bitflag = 0 + + if self.bg is not None: + self.bg.image = new_image + self.bg.image_id = new_image.id + self.bg.provenance_id = self.bg.provenance.id if self.bg.provenance is not None else None + new_image.bg = self.bg.safe_merge(session=session) # take care of the upstream images and their products - # if sa.inspect(self).detached: # self can't load the images, but new_image has them - # upstream_list = new_image.upstream_images - # else: - # upstream_list = self.upstream_images # can use the original images, before merging into new_image try: upstream_list = self.upstream_images # can use the original images, before merging into new_image except DetachedInstanceError as e: @@ -1095,7 +1095,7 @@ def _make_aligned_images(self): # verify all products are loaded for im in self.upstream_images: - if im.sources is None or im.wcs is None or im.zp is None: + if im.sources is None or im.bg is None or im.wcs is None or im.zp is None: raise RuntimeError('Some images are missing data products. Try running load_upstream_products().') aligned = [] @@ -1529,8 +1529,8 @@ def free( self, free_derived_products=True, free_aligned=True, only_free=None ): Parameters ---------- free_derived_products: bool, default True - If True, will also call free on self.sources, self.psf, and - self.wcs + If True, will also call free on self.sources, self.psf, + self.bg and self.wcs. free_aligned: bool, default True Will call free() on each of the aligned images referenced @@ -1560,11 +1560,11 @@ def free( self, free_derived_products=True, free_aligned=True, only_free=None ): self.sources.free() if self.psf is not None: self.psf.free() - # This implementation in WCS should be done after PR167 is done. - # Not a big deal if it's not done, because WCSes will not use - # very much memory - # if self.wcs is not None: - # self.wcs.free() + if self.bg is not None: + self.bg.free() + if self.wcs is not None: + self.wcs.free() + if free_aligned: if self._aligned_images is not None: for alim in self._aligned_images: @@ -1678,6 +1678,9 @@ def load_upstream_products(self, session=None): if im.psf is None or im.psf.provenance_id not in prov_ids: need_to_load = True break + if im.bg is None or im.bg.provenance_id not in prov_ids: + need_to_load = True + break if im.wcs is None or im.wcs.provenance_id not in prov_ids: need_to_load = True break @@ -1690,6 +1693,7 @@ def load_upstream_products(self, session=None): from models.source_list import SourceList from models.psf import PSF + from models.background import Background from models.world_coordinates import WorldCoordinates from models.zero_point import ZeroPoint @@ -1717,6 +1721,13 @@ def load_upstream_products(self, session=None): ) ).all() + bg_results = session.scalars( + sa.select(Background).where( + Background.image_id.in_(im_ids), + Background.provenance_id.in_(prov_ids), + ) + ).all() + wcs_results = session.scalars( sa.select(WorldCoordinates).where( WorldCoordinates.sources_id.in_(sources_ids), @@ -1748,6 +1759,14 @@ def load_upstream_products(self, session=None): elif len(psfs) == 1: im.psf = psfs[0] + bgs = [b for b in bg_results if b.image_id == im.id] # only get the bgs for this image + if len(bgs) > 1: + raise ValueError( + f"Image {im.id} has more than one Background matching upstream provenance." + ) + elif len(bgs) == 1: + im.bg = bgs[0] + if im.sources is not None: wcses = [w for w in wcs_results if w.sources_id == im.sources.id] # the wcses for this image if len(wcses) > 1: @@ -1804,6 +1823,8 @@ def get_upstreams(self, session=None): upstreams.append(im.sources) if im.psf is not None: upstreams.append(im.psf) + if im.bg is not None: + upstreams.append(im.bg) if im.wcs is not None: upstreams.append(im.wcs) if im.zp is not None: @@ -1816,17 +1837,12 @@ def get_downstreams(self, session=None, siblings=False): # avoids circular import from models.source_list import SourceList from models.psf import PSF + from models.background import Background from models.world_coordinates import WorldCoordinates from models.zero_point import ZeroPoint downstreams = [] with SmartSession(session) as session: - # get all psfs that are related to this image (regardless of provenance) - psfs = session.scalars(sa.select(PSF).where(PSF.image_id == self.id)).all() - downstreams += psfs - if self.psf is not None and self.psf not in psfs: # if not in the session, could be duplicate! - downstreams.append(self.psf) - # get all source lists that are related to this image (regardless of provenance) sources = session.scalars( sa.select(SourceList).where(SourceList.image_id == self.id) @@ -1835,6 +1851,17 @@ def get_downstreams(self, session=None, siblings=False): if self.sources is not None and self.sources not in sources: # if not in the session, could be duplicate! downstreams.append(self.sources) + # get all psfs that are related to this image (regardless of provenance) + psfs = session.scalars(sa.select(PSF).where(PSF.image_id == self.id)).all() + downstreams += psfs + if self.psf is not None and self.psf not in psfs: # if not in the session, could be duplicate! + downstreams.append(self.psf) + + bgs = session.scalars(sa.select(Background).where(Background.image_id == self.id)).all() + downstreams += bgs + if self.bg is not None and self.bg not in bgs: # if not in the session, could be duplicate! + downstreams.append(self.bg) + wcses = [] zps = [] for s in sources: @@ -1928,17 +1955,6 @@ def weight(self): def weight(self, value): self._weight = value - @property - def background(self): - """An estimate for the background flux (2D float array). """ - if self._data is None and self.filepath is not None: - self.load() - return self._background - - @background.setter - def background(self, value): - self._background = value - @property def score(self): """The image after filtering with the PSF and normalizing to S/N units (2D float array). """ @@ -1999,6 +2015,26 @@ def nanscore(self): def nanscore(self, value): self._nanscore = value + @property + def data_bgsub(self): + """The image data, after subtracting the background. If no Background object is loaded, will raise. """ + if self.bg is None: + raise ValueError("No background is loaded for this image.") + if self.bg.format == 'scalar': + return self.data - self.bg.value + else: + return self.data - self.bg.counts + + @property + def nandata_bgsub(self): + """The image data, after subtracting the background and masking with NaNs wherever the flag is not zero. """ + if self.bg is None: + raise ValueError("No background is loaded for this image.") + if self.bg.format == 'scalar': + return self.nandata - self.bg.value + else: + return self.nandata - self.bg.counts + def show(self, **kwargs): """ Display the image using the matplotlib imshow function. diff --git a/models/instrument.py b/models/instrument.py index dea62d94..a8afb278 100644 --- a/models/instrument.py +++ b/models/instrument.py @@ -1172,6 +1172,7 @@ def standard_apertures( cls ): list of float """ + return RuntimeError('We should no longer depend on instruments to give the standard apertures') return [ 0.6732, 1., 2., 3., 4., 5., 7., 10. ] @classmethod @@ -1200,6 +1201,7 @@ def fiducial_aperture( cls ): # though, for diagnostic purposes. # Note that this 5 is an index, not the value... it's coincidence that the index number is 5. + return RuntimeError('We should no longer depend on instruments to give the fiducial aperture') return 5 # Gaia specific methods diff --git a/models/measurements.py b/models/measurements.py index 547660f6..6b42fda8 100644 --- a/models/measurements.py +++ b/models/measurements.py @@ -117,18 +117,19 @@ class Measurements(Base, AutoIDMixin, SpatiallyIndexed, HasBitFlagBadness): def flux(self): """The background subtracted aperture flux in the "best" aperture. """ if self.best_aperture == -1: - return self.flux_psf - self.background * self.area_psf + return self.flux_psf - self.bkg_mean * self.area_psf else: - return self.flux_apertures[self.best_aperture] - self.background * self.area_apertures[self.best_aperture] + return self.flux_apertures[self.best_aperture] - self.bkg_mean * self.area_apertures[self.best_aperture] @property def flux_err(self): """The error on the background subtracted aperture flux in the "best" aperture. """ + # we divide by the number of pixels of the background as that is how well we can estimate the b/g mean if self.best_aperture == -1: - return np.sqrt(self.flux_psf_err ** 2 + self.background_err ** 2 * self.area_psf) + return np.sqrt(self.flux_psf_err ** 2 + self.bkg_std ** 2 / self.bkg_pix * self.area_psf) else: err = self.flux_apertures_err[self.best_aperture] - err += self.background_err ** 2 * self.area_apertures[self.best_aperture] + err += self.bkg_std ** 2 / self.bkg_pix * self.area_apertures[self.best_aperture] return np.sqrt(err) @property @@ -165,15 +166,15 @@ def mag_apertures_err(self): @property def magnitude(self): + mag = -2.5 * np.log10(self.flux) + self.zp.zp if self.best_aperture == -1: - return self.mag_psf - return self.mag_apertures[self.best_aperture] + return mag + else: + return mag + self.zp.aper_cors[self.best_aperture] @property def magnitude_err(self): - if self.best_aperture == -1: - return self.mag_psf_err - return self.mag_apertures_err[self.best_aperture] + return np.sqrt((2.5 / np.log(10) * self.flux_err / self.flux) ** 2 + self.zp.dzp ** 2) @property def lim_mag(self): @@ -213,18 +214,25 @@ def instrument_object(self): return None return self.cutouts.sources.image.instrument_object - background = sa.Column( + bkg_mean = sa.Column( sa.REAL, nullable=False, doc="Background of the measurement, from a local annulus. Given as counts per pixel. " ) - background_err = sa.Column( + bkg_std = sa.Column( sa.REAL, nullable=False, doc="RMS error of the background measurement, from a local annulus. Given as counts per pixel. " ) + bkg_pix = sa.Column( + sa.REAL, + nullable=False, + doc="Annulus area (in pixels) used to calculate the mean/std of the background. " + "An estimate of the error on the mean would be bkg_std / sqrt(bkg_pix)." + ) + area_psf = sa.Column( sa.REAL, nullable=False, @@ -291,6 +299,7 @@ def instrument_object(self): def __init__(self, **kwargs): SeeChangeBase.__init__(self) # don't pass kwargs as they could contain non-column key-values + HasBitFlagBadness.__init__(self) self._cutouts_list_index = None # helper (transient) attribute that helps find the right cutouts in a list # manually set all properties (columns or not) @@ -474,7 +483,7 @@ def get_flux_at_point(self, ra, dec, aperture=None): mask[start_y:end_y, start_x:end_x] = psf_clip[start_y + dy:end_y + dy, start_x + dx:end_x + dx] mask[np.isnan(im)] = 0 # exclude bad pixels from the mask flux = np.nansum(im * mask) / np.nansum(mask ** 2) - fluxerr = self.background_err / np.sqrt(np.nansum(mask ** 2)) + fluxerr = self.bkg_std / np.sqrt(np.nansum(mask ** 2)) area = np.nansum(mask) / (np.nansum(mask ** 2)) else: radius = self.aper_radii[aperture] @@ -482,7 +491,7 @@ def get_flux_at_point(self, ra, dec, aperture=None): mask = get_circle(radius=radius, imsize=im.shape[0], soft=True).get_image(offset_x, offset_y) # for aperture photometry we don't normalize, just assume the PSF is in the aperture flux = np.nansum(im * mask) - fluxerr = self.background_err * np.sqrt(np.nansum(mask ** 2)) + fluxerr = self.bkg_std * np.sqrt(np.nansum(mask ** 2)) area = np.nansum(mask) return flux, fluxerr, area diff --git a/models/provenance.py b/models/provenance.py index 2b9ced8a..75ce1d8f 100644 --- a/models/provenance.py +++ b/models/provenance.py @@ -125,6 +125,7 @@ class Provenance(Base): passive_deletes=True, cascade="save-update, merge, expunge, refresh-expire", lazy='selectin', # should be able to get upstream_hashes without a session! + join_depth=3, # how many generations up the upstream chain to load ) downstreams = relationship( diff --git a/models/psf.py b/models/psf.py index e8272a1c..36dde2fe 100644 --- a/models/psf.py +++ b/models/psf.py @@ -66,6 +66,7 @@ def format( self, value ): 'Image', cascade='save-update, merge, refresh-expire, expunge', passive_deletes=True, + lazy='selectin', doc="Image for which this is the PSF." ) @@ -167,6 +168,7 @@ def _get_inverse_badness(self): def __init__( self, *args, **kwargs ): FileOnDiskMixin.__init__( self, **kwargs ) + HasBitFlagBadness.__init__(self) SeeChangeBase.__init__( self ) self._header = None self._data = None @@ -274,7 +276,7 @@ def load( self, download=True, always_verify_md5=False, psfpath=None, psfxmlpath psfpath : str or Path, default None If None, files will be read using the get_fullpath() method to get the right files form the local store and/or archive - given the databse fields. If not None, read _header and + given the database fields. If not None, read _header and _data from this file. (This exists so that this method may be used to load the data with a psf that's not yet in the database, without having to play games with the filepath @@ -305,7 +307,7 @@ def load( self, download=True, always_verify_md5=False, psfpath=None, psfxmlpath self._info = ifp.read() def free( self ): - """Free loaded world coordinates memory. + """Free loaded PSF memory. Wipe out the data, info, and header fields, freeing memory. Depends on python garbage collection, so if there are other @@ -522,17 +524,18 @@ def add_psf_to_image( self, image, x, y, flux, norm=True, noisy=False, weight=No ) def get_upstreams(self, session=None): - """Get the image that was used to make this source list. """ + """Get the image that was used to make this PSF. """ with SmartSession(session) as session: return session.scalars(sa.select(Image).where(Image.id == self.image_id)).all() def get_downstreams(self, session=None, siblings=False): """Get the downstreams of this PSF. - If siblings=True then also include the SourceLists, WCSes, ZPs and background objects + If siblings=True then also include the SourceList, WCS, ZP and background object that were created at the same time as this PSF. """ from models.source_list import SourceList + from models.background import Background from models.world_coordinates import WorldCoordinates from models.zero_point import ZeroPoint from models.provenance import Provenance @@ -559,7 +562,16 @@ def get_downstreams(self, session=None, siblings=False): output.append(sources[0]) - # TODO: add background object + bgs = session.scalars( + sa.select(Background).where( + Background.image_id == self.image_id, + Background.provenance_id == self.provenance_id + ) + ).all() + if len(bgs) != 1: + raise ValueError(f"Expected exactly one Background for SourceList {self.id}, but found {len(bgs)}") + + output.append(bgs[0]) wcs = session.scalars( sa.select(WorldCoordinates).where(WorldCoordinates.sources_id == sources.id) diff --git a/models/reference.py b/models/reference.py index 780411c2..e35bf6ee 100644 --- a/models/reference.py +++ b/models/reference.py @@ -6,6 +6,7 @@ from models.provenance import Provenance from models.source_list import SourceList from models.psf import PSF +from models.background import Background from models.world_coordinates import WorldCoordinates from models.zero_point import ZeroPoint @@ -138,6 +139,7 @@ class Reference(Base, AutoIDMixin): def __init__(self, **kwargs): self.sources = None self.psf = None + self.bg = None self.wcs = None self.zp = None super().__init__(**kwargs) @@ -150,6 +152,7 @@ def __setattr__(self, key, value): self.section_id = value.section_id self.sources = value.sources self.psf = value.psf + self.bg = value.bg self.wcs = value.wcs self.zp = value.zp @@ -160,6 +163,7 @@ def init_on_load(self): Base.init_on_load(self) self.sources = None self.psf = None + self.bg = None self.wcs = None self.zp = None this_object_session = orm.Session.object_session(self) @@ -169,7 +173,7 @@ def init_on_load(self): def make_provenance(self): """Make a provenance for this reference image. """ upstreams = [self.image.provenance] - for att in ['image', 'sources', 'psf', 'wcs', 'zp']: + for att in ['image', 'sources', 'psf', 'bg', 'wcs', 'zp']: if getattr(self, att) is not None: upstreams.append(getattr(self, att).provenance) else: @@ -202,6 +206,8 @@ def get_upstream_provenances(self): prov.append(self.sources.provenance) if self.psf is not None and self.psf.provenance is not None and self.psf.provenance.id is not None: prov.append(self.psf.provenance) + if self.bg is not None and self.bg.provenance is not None and self.bg.provenance.id is not None: + prov.append(self.bg.provenance) if self.wcs is not None and self.wcs.provenance is not None and self.wcs.provenance.id is not None: prov.append(self.wcs.provenance) if self.zp is not None and self.zp.provenance is not None and self.zp.provenance.id is not None: @@ -245,6 +251,20 @@ def load_upstream_products(self, session=None): self.image.psf = psfs[0] self.psf = psfs[0] + bgs = session.scalars( + sa.select(Background).where( + Background.image_id == self.image.id, + Background.provenance_id.in_(prov_ids), + ) + ).all() + if len(bgs) > 1: + raise ValueError( + f"Image {self.image_id} has more than one Background matching upstream provenance." + ) + elif len(bgs) == 1: + self.image.bg = bgs[0] + self.bg = bgs[0] + if self.sources is not None: wcses = session.scalars( sa.select(WorldCoordinates).where( diff --git a/models/report.py b/models/report.py index 6869b114..ead59d29 100644 --- a/models/report.py +++ b/models/report.py @@ -291,17 +291,32 @@ def __init__(self, **kwargs): def init_on_load(self): SeeChangeBase.init_on_load(self) - def scan_datastore(self, ds, process_step, session=None): + def scan_datastore(self, ds, process_step=None, session=None): """Go over all the data in a datastore and update the report accordingly. - Will commit the changes to the database. + Will commit the Report object to the database. If there are any exceptions pending on the datastore it will re-raise them. + + Parameters + ---------- + ds : DataStore + The datastore to scan for information. + process_step : str, optional + The name of the process step that was just completed. + This will be added to the progress bitflag. + If not given, will skip adding the progress flag, + but will also skip checking warnings and errors... + Use without a process_step just to update general + properties of the datastore like the runtime and memory usage, + or for updating the products_exist and products_committed bitflags + (e.g., after saving the datastore). + session : sqlalchemy.orm.Session, optional + The session to use for committing the changes to the database. + If not given, will open a session and close it at the end + of the function. """ # parse the error, if it exists, so we can get to other data products without raising exception = ds.read_exception() - # append the newest step to the progress bitflag - self.append_progress(process_step) - # check which objects exist on the datastore, and which have been committed for prod in pipeline_products_dict.values(): if getattr(ds, prod) is not None: @@ -313,18 +328,23 @@ def scan_datastore(self, ds, process_step, session=None): self.process_runtime = ds.runtimes # update with new dictionary self.process_memory = ds.memory_usages # update with new dictionary - # parse the warnings, if they exist - if isinstance(ds.warnings_list, list): - new_string = self.read_warnings(process_step, ds.warnings_list) - if self.warnings is None or self.warnings == '': - self.warnings = new_string - else: - self.warnings += '\n***|***|***\n' + new_string - - if exception is not None: - self.error_type = exception.__class__.__name__ - self.error_message = str(exception) - self.error_step = process_step + if process_step is not None: + # append the newest step to the progress bitflag + if process_step in process_steps_inverse: # skip steps not in the dict + self.append_progress(process_step) + + # parse the warnings, if they exist + if isinstance(ds.warnings_list, list): + new_string = self.read_warnings(process_step, ds.warnings_list) + if self.warnings is None or self.warnings == '': + self.warnings = new_string + else: + self.warnings += '\n***|***|***\n' + new_string + + if exception is not None: + self.error_type = exception.__class__.__name__ + self.error_message = str(exception) + self.error_step = process_step with SmartSession(session) as session: new_report = self.commit_to_database(session=session) diff --git a/models/source_list.py b/models/source_list.py index 2daf1bbd..8bc19bbc 100644 --- a/models/source_list.py +++ b/models/source_list.py @@ -91,23 +91,21 @@ def format(self, value): doc="Radius of apertures used for aperture photometry in pixels." ) - _inf_aper_num = sa.Column( + inf_aper_num = sa.Column( sa.SMALLINT, nullable=True, default=None, index=False, - doc="Which element of aper_rads to use as the 'infinite' aperture; null = last one" + doc="Which element of aper_rads to use as the 'infinite' aperture; -1 = last one. " ) - @property - def inf_aper_num( self ): - if self._inf_aper_num is None: - if self.aper_rads is None: - return None - else: - return len(self.aper_rads) - 1 - else: - return self._inf_aper_num + best_aper_num = sa.Column( + sa.SMALLINT, + nullable=True, + default=None, + index=False, + doc="Which element of aper_rads to use as the 'best' aperture; -1 = use PSF photometry. " + ) num_sources = sa.Column( sa.Integer, @@ -144,6 +142,7 @@ def _get_inverse_badness(self): def __init__(self, *args, **kwargs): FileOnDiskMixin.__init__(self, *args, **kwargs) + HasBitFlagBadness.__init__(self) SeeChangeBase.__init__(self) # don't pass kwargs as they could contain non-column key-values self._data = None @@ -409,7 +408,7 @@ def apfluxadu( self, apnum=0, ap=None ): ap: float, default None If not None, look for an aperture that's within 0.01 pixels of this and return flux in apertures of that radius. Raises - an exception if such an aperture doesn't apear in aper_rads + an exception if such an aperture doesn't appear in aper_rads Returns ------- @@ -420,7 +419,7 @@ def apfluxadu( self, apnum=0, ap=None ): raise NotImplementedError( f"Not currently implemented for format {self.format}" ) if ap is None: - if ( self.aper_rads is None ) or ( apnum < 0 ) or ( apnum >= len(self.aper_rads) ): + if ( self.aper_rads is None ) or ( apnum >= len(self.aper_rads) ): raise ValueError( f"Aperture radius number {apnum} doesn't exist." ) else: w = np.where( np.abs( np.array( self.aper_rads) - ap ) < 0.01 )[0] @@ -485,7 +484,7 @@ def calc_aper_cor( self, aper_num=0, inf_aper_num=None, min_stars=20 ): inf_aper_num = self.inf_aper_num if inf_aper_num is None: raise RuntimeError( f"Can't determine which aperture to use as the \"infinite\" aperture" ) - if ( inf_aper_num < 0 ) or ( inf_aper_num >= len(self.aper_rads) ): + if inf_aper_num >= len(self.aper_rads): raise ValueError( f"inf_aper_num {inf_aper_num} is outside available list of {len(self.aper_rads)}" ) bigflux, bigfluxerr = self.apfluxadu( apnum=inf_aper_num ) @@ -752,10 +751,11 @@ def get_upstreams(self, session=None): def get_downstreams(self, session=None, siblings=False): """Get all the data products that are made using this source list. - If siblings=True then also include the PSFs, WCSes, ZPs and background objects + If siblings=True then also include the PSF, Background, WCS, and ZP that were created at the same time as this SourceList. """ from models.psf import PSF + from models.background import Background from models.world_coordinates import WorldCoordinates from models.zero_point import ZeroPoint from models.cutouts import Cutouts @@ -780,7 +780,14 @@ def get_downstreams(self, session=None, siblings=False): if len(psfs) != 1: raise ValueError(f"Expected exactly one PSF for SourceList {self.id}, but found {len(psfs)}") - # TODO: add background object + bgs = session.scalars( + sa.select(Background).where( + Background.image_id == self.image_id, + Background.provenance_id == self.provenance_id + ) + ).all() + if len(bgs) != 1: + raise ValueError(f"Expected exactly one Background for SourceList {self.id}, but found {len(bgs)}") wcs = session.scalars(sa.select(WorldCoordinates).where(WorldCoordinates.sources_id == self.id)).all() if len(wcs) != 1: @@ -792,7 +799,8 @@ def get_downstreams(self, session=None, siblings=False): raise ValueError( f"Expected exactly one ZeroPoint for SourceList {self.id}, but found {len(zps)}" ) - output += psfs + wcs + zps + + output += psfs + bgs + wcs + zps return output diff --git a/models/world_coordinates.py b/models/world_coordinates.py index d4676115..7720a41e 100644 --- a/models/world_coordinates.py +++ b/models/world_coordinates.py @@ -75,6 +75,7 @@ def wcs( self, value ): def __init__(self, *args, **kwargs): FileOnDiskMixin.__init__( self, **kwargs ) + HasBitFlagBadness.__init__(self) SeeChangeBase.__init__( self ) self._wcs = None @@ -106,11 +107,12 @@ def get_upstreams(self, session=None): def get_downstreams(self, session=None, siblings=False): """Get the downstreams of this WorldCoordinates. - If siblings=True then also include the SourceLists, PSFs, ZPs and background objects + If siblings=True then also include the SourceList, PSF, background object and ZP that were created at the same time as this WorldCoordinates. """ from models.source_list import SourceList from models.psf import PSF + from models.background import Background from models.zero_point import ZeroPoint from models.provenance import Provenance @@ -143,7 +145,18 @@ def get_downstreams(self, session=None, siblings=False): output.append(psf[0]) - # TODO: add background object + bgs = session.scalars( + sa.select(Background).where( + Background.image_id == sources.image_id, Background.provenance_id == self.provenance_id + ) + ).all() + + if len(bgs) > 1: + raise ValueError( + f"Expected exactly one Background for WorldCoordinates {self.id}, but found {len(bgs)}." + ) + + output.append(bgs[0]) zp = session.scalars(sa.select(ZeroPoint).where(ZeroPoint.sources_id == sources.id)).all() @@ -223,4 +236,12 @@ def load( self, download=True, always_verify_md5=False, txtpath=None ): with open( txtpath ) as ifp: headertxt = ifp.read() self.wcs = WCS( fits.Header.fromstring( headertxt , sep='\\n' )) - + + def free(self): + """Free loaded world coordinates memory. + + Wipe out the _wcs text field, freeing a small amount of memory. + Depends on python garbage collection, so if there are other + references to those objects, the memory won't actually be freed. + """ + self._wcs = None diff --git a/models/zero_point.py b/models/zero_point.py index 257a6dc8..0e8bdbcc 100644 --- a/models/zero_point.py +++ b/models/zero_point.py @@ -94,6 +94,7 @@ class ZeroPoint(Base, AutoIDMixin, HasBitFlagBadness): ) def __init__(self, *args, **kwargs): + HasBitFlagBadness.__init__(self) SeeChangeBase.__init__(self) # don't pass kwargs as they could contain non-column key-values # manually set all properties (columns or not) @@ -145,19 +146,19 @@ def get_upstreams(self, session=None): def get_downstreams(self, session=None, siblings=False): """Get the downstreams of this ZeroPoint. - If siblings=True then also include the SourceLists, PSFs, WCSes, and background objects + If siblings=True then also include the SourceList, PSF, background object and WCS that were created at the same time as this ZeroPoint. """ from models.source_list import SourceList from models.psf import PSF + from models.background import Background from models.world_coordinates import WorldCoordinates from models.provenance import Provenance with SmartSession(session) as session: subs = session.scalars( sa.select(Image).where( - Image.provenance.has(Provenance.upstreams.any(Provenance.id == self.provenance.id)), - Image.upstream_images.any(Image.id == self.sources.image_id), + Image.provenance.has(Provenance.upstreams.any(Provenance.id == self.provenance.id)) ) ).all() output = subs @@ -180,7 +181,18 @@ def get_downstreams(self, session=None, siblings=False): output.append(psf[0]) - # TODO: add background object + bgs = session.scalars( + sa.select(Background).where( + Background.image_id == sources.image_id, Background.provenance_id == self.provenance_id + ) + ).all() + + if len(bgs) > 1: + raise ValueError( + f"Expected exactly one Background for WorldCoordinates {self.id}, but found {len(bgs)}." + ) + + output.append(bgs[0]) wcs = session.scalars( sa.select(WorldCoordinates).where(WorldCoordinates.sources_id == sources.id) diff --git a/pipeline/backgrounding.py b/pipeline/backgrounding.py new file mode 100644 index 00000000..590dcef3 --- /dev/null +++ b/pipeline/backgrounding.py @@ -0,0 +1,169 @@ +import os +import time + +import numpy as np + +import sep + +from pipeline.parameters import Parameters +from pipeline.data_store import DataStore + +from models.background import Background + +from util.logger import SCLogger +from util.util import parse_bool + + +class ParsBackgrounder(Parameters): + def __init__(self, **kwargs): + super().__init__() + + self.format = self.add_par( + 'format', + 'map', + str, + 'Format of the background image. Choose: "map", "scalar", or "polynomial". ', + critical=True + ) + + self.method = self.add_par( + 'method', + 'sep', + str, + 'Method to use to estimate the background. Choose: "sep" or "zero". ', + critical=True + ) + + self.poly_order = self.add_par( + 'poly_order', + 1, + int, + 'Order of the polynomial to fit to the background. ', + critical=True + ) + + self.sep_box_size = self.add_par( + 'sep_box_size', + 128, + int, + 'Size of the box in pixels to use for the background estimation using sep. ', + critical=True + ) + + self.sep_filt_size = self.add_par( + 'sep_filt_size', + 3, + int, + 'Size of the filter to use for the background estimation using sep. ', + critical=True + ) + + self._enforce_no_new_attrs = True + + self.override(kwargs) + + def get_process_name(self): + return 'backgrounding' + + +class Backgrounder: + def __init__(self, **kwargs): + self.pars = ParsBackgrounder(**kwargs) + + # this is useful for tests, where we can know if + # the object did any work or just loaded from DB or datastore + self.has_recalculated = False + + def run(self, *args, **kwargs): + """Calculate the background for the given image. + + Arguments are parsed by the DataStore.parse_args() method. + Returns a DataStore object with the products of the processing. + """ + self.has_recalculated = False + + try: # first make sure we get back a datastore, even an empty one + ds, session = DataStore.from_args(*args, **kwargs) + except Exception as e: + return DataStore.catch_failure_to_parse(e, *args) + + try: + t_start = time.perf_counter() + if parse_bool(os.getenv('SEECHANGE_TRACEMALLOC')): + import tracemalloc + tracemalloc.reset_peak() # start accounting for the peak memory usage from here + + self.pars.do_warning_exception_hangup_injection_here() + + # get the provenance for this step: + prov = ds.get_provenance('extraction', self.pars.get_critical_pars(), session=session) + + # try to find the background object in memory or in the database: + bg = ds.get_background(prov, session=session) + + if bg is None: # need to produce a background object + self.has_recalculated = True + image = ds.get_image(session=session) + + if self.pars.method == 'sep': + # Estimate the background mean and RMS with sep + boxsize = self.pars.sep_box_size + filtsize = self.pars.sep_filt_size + SCLogger.debug("Subtracting sky and estimating sky RMS") + # Dysfunctionality alert: sep requires a *float* image for the mask + # IEEE 32-bit floats have 23 bits in the mantissa, so they should + # be able to precisely represent a 16-bit integer mask image + # In any event, sep.Background uses >0 as "bad" + fmask = np.array(image._flags, dtype=np.float32) + sep_bg_obj = sep.Background(image.data.copy(), mask=fmask, + bw=boxsize, bh=boxsize, fw=filtsize, fh=filtsize) + fmask = None + bg = Background( + value=float(np.nanmedian(sep_bg_obj.back())), + noise=float(np.nanmedian(sep_bg_obj.rms())), + counts=sep_bg_obj.back(), + rms=sep_bg_obj.rms(), + format='map', + method='sep' + ) + elif self.pars.method == 'zero': # don't measure the b/g + bg = Background(value=0, noise=0, format='scalar', method='zero') + else: + raise ValueError(f'Unknown background method "{self.pars.method}"') + + bg.image_id = image.id + bg.image = image + + if bg.provenance is None: + bg.provenance = prov + else: + if bg.provenance.id != prov.id: + raise ValueError('Provenance mismatch for background and extraction provenance!') + + # since these are "first look estimates" we don't update them if they are already set + if ds.image.bkg_mean_estimate is None and ds.image.bkg_rms_estimate is None: + ds.image.bkg_mean_estimate = float( bg.value ) + ds.image.bkg_rms_estimate = float( bg.noise ) + + bg._upstream_bitflag = 0 + bg._upstream_bitflag |= ds.image.bitflag + + sources = ds.get_sources(session=session) + if sources is not None: + bg._upstream_bitflag |= sources.bitflag + + psf = ds.get_psf(session=session) + if psf is not None: + bg._upstream_bitflag |= psf.bitflag + + ds.bg = bg + + ds.runtimes['backgrounding'] = time.perf_counter() - t_start + if parse_bool(os.getenv('SEECHANGE_TRACEMALLOC')): + import tracemalloc + ds.memory_usages['backgrounding'] = tracemalloc.get_traced_memory()[1] / 1024 ** 2 # in MB + + except Exception as e: + ds.catch_exception(e) + finally: # make sure datastore is returned to be used in the next step + return ds diff --git a/pipeline/coaddition.py b/pipeline/coaddition.py index 4082a119..aec26ed7 100644 --- a/pipeline/coaddition.py +++ b/pipeline/coaddition.py @@ -1,4 +1,3 @@ - import numpy as np from numpy.fft import fft2, ifft2, fftshift @@ -15,6 +14,7 @@ from pipeline.parameters import Parameters from pipeline.data_store import DataStore from pipeline.detection import Detector +from pipeline.backgrounding import Backgrounder from pipeline.astro_cal import AstroCalibrator from pipeline.photo_cal import PhotCalibrator from util.util import get_latest_provenance, parse_session @@ -272,7 +272,7 @@ def _coadd_zogy( ---------- images: list of Image or list of 2D ndarrays Images that have been aligned to each other. - Each image must also have a PSF object attached. + Each image must also have a PSF and a background object attached. weights: list of 2D ndarrays The weights to use for each image. If images is given as Image objects, can be left as None. @@ -291,12 +291,10 @@ def _coadd_zogy( bkg_means: list of floats The mean background for each image. If images is given as Image objects, can be left as None. - This variable can be used to override the background estimation. If images are already background subtracted, set these to zeros. bkg_sigmas: list of floats The RMS of the background for each image. If images is given as Image objects, can be left as None. - This variable can be used to override the background estimation. Returns ------- @@ -348,12 +346,10 @@ def _coadd_zogy( # estimate the background if not given if bkg_means is None or bkg_sigmas is None: - bkg_means = [] - bkg_sigmas = [] - for array in data: - bkg, sigma = self._estimate_background(array) - bkg_means.append(bkg) - bkg_sigmas.append(sigma) + if not isinstance(images[0], Image): + raise ValueError('Background must be given if images are not Image objects. ') + bkg_means = [im.bg.value for im in images] + bkg_sigmas = [im.bg.noise for im in images] imcube = np.array(data) flcube = np.array(flags) @@ -496,6 +492,13 @@ def __init__(self, **kwargs): self.pars.add_defaults_to_dict(extraction_config) self.extractor = Detector(**extraction_config) + # background estimation + backgrounder_config = self.config.value('extraction.bg', {}) + backgrounder_config.update(self.config.value('coaddition.extraction.bg', {})) # override coadd specific pars + backgrounder_config.update(kwargs.get('extraction', {}).get('bg', {})) + self.pars.add_defaults_to_dict(backgrounder_config) + self.backgrounder = Backgrounder(**backgrounder_config) + # astrometric fit using a first pass of sextractor and then astrometric fit to Gaia astrometor_config = self.config.value('extraction.wcs', {}) astrometor_config.update(self.config.value('coaddition.extraction.wcs', {})) # override coadd specific pars @@ -511,8 +514,14 @@ def __init__(self, **kwargs): self.photometor = PhotCalibrator(**photometor_config) # make sure when calling get_critical_pars() these objects will produce the full, nested dictionary - siblings = {'sources': self.extractor.pars, 'wcs': self.astrometor.pars, 'zp': self.photometor.pars} + siblings = { + 'sources': self.extractor.pars, + 'bg': self.backgrounder.pars, + 'wcs': self.astrometor.pars, + 'zp': self.photometor.pars + } self.extractor.pars.add_siblings(siblings) + self.backgrounder.pars.add_siblings(siblings) self.astrometor.pars.add_siblings(siblings) self.photometor.pars.add_siblings(siblings) @@ -640,6 +649,7 @@ def run(self, *args, **kwargs): # TODO: add the warnings/exception capturing, runtime/memory tracking (and Report making) as in top_level.py self.datastore = self.extractor.run(self.datastore) + self.datastore = self.backgrounder.run(self.datastore) self.datastore = self.astrometor.run(self.datastore) self.datastore = self.photometor.run(self.datastore) diff --git a/pipeline/cutting.py b/pipeline/cutting.py index a533abbc..704c28d1 100644 --- a/pipeline/cutting.py +++ b/pipeline/cutting.py @@ -84,7 +84,9 @@ def run(self, *args, **kwargs): detections = ds.get_detections(session=session) if detections is None: - raise ValueError(f'Cannot find a source list corresponding to the datastore inputs: {ds.get_inputs()}') + raise ValueError( + f'Cannot find a source list corresponding to the datastore inputs: {ds.get_inputs()}' + ) cutout_list = [] x = detections.x diff --git a/pipeline/data_store.py b/pipeline/data_store.py index 6f185526..addb4cb1 100644 --- a/pipeline/data_store.py +++ b/pipeline/data_store.py @@ -12,6 +12,7 @@ from models.image import Image, image_upstreams_association_table from models.source_list import SourceList from models.psf import PSF +from models.background import Background from models.world_coordinates import WorldCoordinates from models.zero_point import ZeroPoint from models.reference import Reference @@ -56,6 +57,7 @@ class DataStore: 'image', 'sources', 'psf', + 'bg', 'wcs', 'zp', 'sub_image', @@ -271,6 +273,7 @@ def __init__(self, *args, **kwargs): self.image = None # single image from one sensor section self.sources = None # extracted sources (a SourceList object, basically a catalog) self.psf = None # psf determined from the extracted sources + self.bg = None # background from the extraction phase self.wcs = None # astrometric solution self.zp = None # photometric calibration self.reference = None # the Reference object needed to make subtractions @@ -365,6 +368,12 @@ def __setattr__(self, key, value): if key == 'sources' and not isinstance(value, SourceList): raise ValueError(f'sources must be a SourceList object, got {type(value)}') + if key == 'psf' and not isinstance(value, PSF): + raise ValueError(f'psf must be a PSF object, got {type(value)}') + + if key == 'bg' and not isinstance(value, Background): + raise ValueError(f'bg must be a Background object, got {type(value)}') + if key == 'wcs' and not isinstance(value, WorldCoordinates): raise ValueError(f'WCS must be a WorldCoordinates object, got {type(value)}') @@ -661,7 +670,7 @@ def get_image(self, provenance=None, session=None): sa.select(Image).where( Image.exposure_id == self.exposure_id, Image.section_id == str(self.section_id), - Image.provenance.has(id=provenance.id) + Image.provenance_id == provenance.id, ) ).first() @@ -673,7 +682,7 @@ def append_image_products(self, image): pipeline applications, to make sure the image object has all the data products it needs. """ - for att in ['sources', 'psf', 'wcs', 'zp', 'detections', 'cutouts', 'measurements']: + for att in ['sources', 'psf', 'bg', 'wcs', 'zp', 'detections', 'cutouts', 'measurements']: if getattr(self, att, None) is not None: setattr(image, att, getattr(self, att)) if image.sources is not None: @@ -732,7 +741,7 @@ def get_sources(self, provenance=None, session=None): sa.select(SourceList).where( SourceList.image_id == image.id, SourceList.is_sub.is_(False), - SourceList.provenance.has(id=provenance.id), + SourceList.provenance_id == provenance.id, ) ).first() @@ -749,8 +758,6 @@ def get_psf(self, provenance=None, session=None): the current code version and critical parameters. If none is given, uses the appropriate provenance from the prov_tree dictionary. - If prov_tree is None, will use the latest provenance - for the "extraction" process. Usually the provenance is not given when the psf is loaded in order to be used as an upstream of the current process. session: sqlalchemy.orm.session.Session @@ -786,11 +793,65 @@ def get_psf(self, provenance=None, session=None): image = self.get_image(session=session) if image is not None: self.psf = session.scalars( - sa.select(PSF).where(PSF.image_id == image.id, PSF.provenance.has(id=provenance.id)) + sa.select(PSF).where(PSF.image_id == image.id, PSF.provenance_id == provenance.id) ).first() return self.psf + def get_background(self, provenance=None, session=None): + """Get a Background object, either from memory or from the database. + + Parameters + ---------- + provenance: Provenance object + The provenance to use for the background. + This provenance should be consistent with + the current code version and critical parameters. + If none is given, uses the appropriate provenance + from the prov_tree dictionary. + Usually the provenance is not given when the background is loaded + in order to be used as an upstream of the current process. + session: sqlalchemy.orm.session.Session + An optional session to use for the database query. + If not given, will use the session stored inside the + DataStore object; if there is none, will open a new session + and close it at the end of the function. + + Returns + ------- + bg: Background object + The background object for this image, + or None if no matching background is found. + + """ + process_name = 'extraction' + if provenance is None: # try to get the provenance from the prov_tree + provenance = self._get_provenance_for_an_upstream(process_name, session) + + # if background exists in memory, check the provenance is ok + if self.bg is not None: + # make sure the background object has the correct provenance + if self.bg.provenance is None: + raise ValueError('Background has no provenance!') + if provenance is not None and provenance.id != self.bg.provenance.id: + self.bg = None + + # TODO: do we need to test the b/g Provenance has upstreams consistent with self.image.provenance? + + # not in memory, look for it on the DB + if self.bg is None: + with SmartSession(session, self.session) as session: + image = self.get_image(session=session) + if image is not None: + self.bg = session.scalars( + sa.select(Background).where( + Background.image_id == image.id, + Background.provenance_id == provenance.id, + ) + ).first() + + return self.bg + def get_wcs(self, provenance=None, session=None): """Get an astrometric solution in the form of a WorldCoordinates object, from memory or from the database. @@ -840,7 +901,7 @@ def get_wcs(self, provenance=None, session=None): if sources is not None and sources.id is not None: self.wcs = session.scalars( sa.select(WorldCoordinates).where( - WorldCoordinates.sources_id == sources.id, WorldCoordinates.provenance.has(id=provenance.id) + WorldCoordinates.sources_id == sources.id, WorldCoordinates.provenance_id == provenance.id ) ).first() @@ -895,7 +956,7 @@ def get_zp(self, provenance=None, session=None): if sources is not None and sources.id is not None: self.zp = session.scalars( sa.select(ZeroPoint).where( - ZeroPoint.sources_id == sources.id, ZeroPoint.provenance.has(id=provenance.id) + ZeroPoint.sources_id == sources.id, ZeroPoint.provenance_id == provenance.id ) ).first() @@ -1134,7 +1195,7 @@ def get_subtraction(self, provenance=None, session=None): aliased_table.c.upstream_id == image.id, aliased_table.c.downstream_id == Image.id, ) - ).where(Image.provenance.has(id=provenance.id)) + ).where(Image.provenance_id == provenance.id) ).first() if self.sub_image is not None: @@ -1189,7 +1250,7 @@ def get_detections(self, provenance=None, session=None): sa.select(SourceList).where( SourceList.image_id == sub_image.id, SourceList.is_sub.is_(True), - SourceList.provenance.has(id=provenance.id), + SourceList.provenance_id == provenance.id, ) ).first() @@ -1234,7 +1295,7 @@ def get_cutouts(self, provenance=None, session=None): if self.cutouts[0].provenance is None: raise ValueError('Cutouts have no provenance!') if provenance is not None and provenance.id != self.cutouts[0].provenance.id: - self.detections = None + self.cutouts = None # not in memory, look for it on the DB if self.cutouts is None: @@ -1253,7 +1314,7 @@ def get_cutouts(self, provenance=None, session=None): self.cutouts = session.scalars( sa.select(Cutouts).where( Cutouts.sources_id == sub_image.sources.id, - Cutouts.provenance.has(id=provenance.id), + Cutouts.provenance_id == provenance.id, ) ).all() @@ -1304,7 +1365,7 @@ def get_measurements(self, provenance=None, session=None): self.measurements = session.scalars( sa.select(Measurements).where( Measurements.cutouts_id.in_(cutout_ids), - Measurements.provenance.has(id=provenance.id), + Measurements.provenance_id == provenance.id, ) ).all() @@ -1335,7 +1396,7 @@ def get_all_data_products(self, output='dict', omit_exposure=False): no nested). Any None values will be removed. """ attributes = [] if omit_exposure else [ '_exposure' ] - attributes.extend( [ 'image', 'wcs', 'sources', 'psf', 'zp', 'sub_image', + attributes.extend( [ 'image', 'wcs', 'sources', 'psf', 'bg', 'zp', 'sub_image', 'detections', 'cutouts', 'measurements' ] ) result = {att: getattr(self, att) for att in attributes} if output == 'dict': @@ -1430,7 +1491,7 @@ def save_and_commit(self, exists_ok=False, overwrite=True, no_archive=False, continue SCLogger.debug( f'save_and_commit considering a {obj.__class__.__name__} with filepath ' - f'{obj.filepath if isinstance(obj,FileOnDiskMixin) else ""}' ) + f'{obj.filepath if isinstance(obj,FileOnDiskMixin) else ""}' ) if isinstance(obj, FileOnDiskMixin): mustsave = True @@ -1473,7 +1534,7 @@ def save_and_commit(self, exists_ok=False, overwrite=True, no_archive=False, with SmartSession(session, self.session) as session: if self.image is not None: self.image = self.image.merge_all(session) - for att in ['sources', 'psf', 'wcs', 'zp']: + for att in ['sources', 'psf', 'bg', 'wcs', 'zp']: setattr(self, att, None) # avoid automatically appending to the image self's non-merged products for att in ['exposure', 'sources', 'psf', 'wcs', 'zp']: if getattr(self.image, att, None) is not None: @@ -1485,13 +1546,14 @@ def save_and_commit(self, exists_ok=False, overwrite=True, no_archive=False, if self.image_id is None and self.image is not None: self.image_id = self.image.id - self.psf = self.image.psf self.sources = self.image.sources + self.psf = self.image.psf + self.bg = self.image.bg self.wcs = self.image.wcs self.zp = self.image.zp session.commit() - self.products_committed = 'image, sources, psf, wcs, zp' + self.products_committed = 'image, sources, psf, wcs, zp, bg' if self.sub_image is not None: if self.reference is not None: @@ -1501,17 +1563,20 @@ def save_and_commit(self, exists_ok=False, overwrite=True, no_archive=False, self.sub_image.ref_image.id = self.sub_image.ref_image_id self.detections = self.sub_image.sources - session.commit() - self.products_committed += ', sub_image' + session.commit() + self.products_committed += ', sub_image' if self.detections is not None: + more_products = 'detections' if self.cutouts is not None: if self.measurements is not None: # keep track of which cutouts goes to which measurements for m in self.measurements: - m._cutouts_list_index = self.cutouts.index(m.cutouts) + idx = [c.index_in_sources for c in self.cutouts].index(m.cutouts.index_in_sources) + m._cutouts_list_index = idx for cutout in self.cutouts: cutout.sources = self.detections self.cutouts = Cutouts.merge_list(self.cutouts, session) + more_products += ', cutouts' if self.measurements is not None: for i, m in enumerate(self.measurements): @@ -1520,9 +1585,10 @@ def save_and_commit(self, exists_ok=False, overwrite=True, no_archive=False, self.measurements[i].associate_object(session) self.measurements[i] = session.merge(self.measurements[i]) self.measurements[i].object.measurements.append(self.measurements[i]) + more_products += ', measurements' - session.commit() - self.products_committed += ', detections, cutouts, measurements' + session.commit() + self.products_committed += ', ' + more_products def delete_everything(self, session=None, commit=True): """Delete everything associated with this sub-image. diff --git a/pipeline/detection.py b/pipeline/detection.py index f7f8629f..85307c7e 100644 --- a/pipeline/detection.py +++ b/pipeline/detection.py @@ -3,6 +3,7 @@ import random import subprocess import time +import warnings import numpy as np import numpy.lib.recfunctions as rfn @@ -23,8 +24,9 @@ from models.base import FileOnDiskMixin, CODE_ROOT from models.image import Image -from models.psf import PSF from models.source_list import SourceList +from models.psf import PSF +from models.background import Background from improc.tools import sigma_clipping @@ -47,33 +49,72 @@ def __init__(self, **kwargs): critical=True ) - self.psf = self.add_par( - 'psf', - None, - ( PSF, int, None ), - 'Use this PSF; pass the PSF object, or its integer id. ' - 'If None, will not do PSF photometry. Ignored if measure_psf is True.' , + self.background_format = self.add_par( + 'background_format', + 'map', + str, + 'Format of the background; one of "map", "scalar", or "polynomial".', + critical=True + ) + + self.background_order = self.add_par( + 'background_order', + 2, + int, + 'Order of the polynomial background. Ignored unless background is "polynomial".', + critical=True + ) + + self.background_method = self.add_par( + 'background_method', + 'sep', + str, + 'Method to use for background subtraction; currently only "sep" is supported.', + critical=True + ) + + self.background_box_size = self.add_par( + 'background_box_size', + 128, + int, + 'Size of the box to use for background estimation in sep.', + critical=True + ) + + self.background_filt_size = self.add_par( + 'background_filt_size', + 3, + int, + 'Size of the filter to use for background estimation in sep.', critical=True ) self.apers = self.add_par( 'apers', - None, - ( None, list ), - 'Apertures in which to measure photometry; a list of floats or None', + [1.0, 2.0, 3.0, 5.0], + list, + 'Apertures in which to measure photometry; a list of floats. ', critical=True ) self.add_alias( 'apertures', 'apers' ) self.inf_aper_num = self.add_par( 'inf_aper_num', - None, - ( None, int ), - ( 'Which of apers is the one to use as the "infinite" aperture for aperture corrections; ' - 'default is to use the last one. Ignored if self.apers is None.' ), + -1, + int, + 'Which of apers is the one to use as the "infinite" aperture for aperture corrections. ' + 'If -1, will use the last aperture, not the PSF flux! ', critical=True ) + self.best_aper_num = self.add_par( + 'best_aper_num', + 0, + int, + 'Which of apers is the one to use as the "best" aperture, for things like plotting or calculating' + 'the limiting magnitude. Note that -1 will use the PSF flux, not the last aperture on the list. ' + ) + self.aperunit = self.add_par( 'aperunit', 'fwhm', @@ -307,6 +348,7 @@ def run(self, *args, **kwargs): raise ValueError(f'Cannot find an image corresponding to the datastore inputs: {ds.get_inputs()}') sources, psf, bkg, bkgsig = self.extract_sources( image ) + sources.image = image if sources.provenance is None: sources.provenance = prov @@ -319,14 +361,11 @@ def run(self, *args, **kwargs): psf.provenance = prov else: if psf.provenance.id != prov.id: - raise ValueError('Provenance mismatch for pfs and provenance!') + raise ValueError('Provenance mismatch for PSF and extraction provenance!') ds.sources = sources ds.psf = psf ds.image.fwhm_estimate = psf.fwhm_pixels # TODO: should we only write if the property is None? - if self.has_recalculated: - ds.image.bkg_mean_estimate = float( bkg ) - ds.image.bkg_rms_estimate = float( bkgsig ) ds.runtimes['extraction'] = time.perf_counter() - t_start if parse_bool(os.getenv('SEECHANGE_TRACEMALLOC')): @@ -339,7 +378,25 @@ def run(self, *args, **kwargs): return ds def extract_sources(self, image): - """Calls one of the extraction methods, based on self.pars.method. """ + """Calls one of the extraction methods, based on self.pars.method. + + Parameters + ---------- + image: Image + The Image object from which to extract sources. + + Returns + ------- + sources: SourceList object + A list of sources with their positions and fluxes. + psf: PSF object + An estimate for the point spread function of the image. + bkg: float + An estimate for the mean value of the background of the image. + bkgsig: float + An estimate for the standard deviation of the background of the image. + + """ sources = None psf = None bkg = None @@ -348,8 +405,7 @@ def extract_sources(self, image): sources = self.extract_sources_sep(image) elif self.pars.method == 'sextractor': if self.pars.subtraction: - psffile = None if self.pars.psf is None else self.pars.psf.get_fullpath() - sources, _, _, _ = self.extract_sources_sextractor(image, psffile=psffile) + sources, _, _, _ = self.extract_sources_sextractor(image, psffile=None) else: sources, psf, bkg, bkgsig = self.extract_sources_sextractor(image) elif self.pars.method == 'filter': @@ -360,14 +416,10 @@ def extract_sources(self, image): else: raise ValueError(f'Unknown extraction method "{self.pars.method}"') - if psf is not None: - if psf._upstream_bitflag is None: - psf._upstream_bitflag = 0 - psf._upstream_bitflag |= image.bitflag if sources is not None: - if sources._upstream_bitflag is None: - sources._upstream_bitflag = 0 sources._upstream_bitflag |= image.bitflag + if psf is not None: + psf._upstream_bitflag |= image.bitflag return sources, psf, bkg, bkgsig @@ -378,14 +430,7 @@ def extract_sources_sextractor( self, image, psffile=None ): psfxmlpath = None try: # cleanup at the end - if self.pars.apers is None: - apers = np.array( image.instrument_object.standard_apertures() ) - inf_aper_num = image.instrument_object.fiducial_aperture() - else: - apers = self.pars.apers - inf_aper_num = self.pars.inf_aper_num - if inf_aper_num is None: - inf_aper_num = len( apers ) - 1 + apers = np.array(self.pars.apers) if self.pars.measure_psf: # Run sextractor once without a psf to get objects from @@ -408,9 +453,6 @@ def extract_sources_sextractor( self, image, psffile=None ): psf = self._run_psfex( tempnamebase, image, do_not_cleanup=True ) psfpath = pathlib.Path( FileOnDiskMixin.temp_path ) / f'{tempnamebase}.sources.psf' psfxmlpath = pathlib.Path( FileOnDiskMixin.temp_path ) / f'{tempnamebase}.sources.psf.xml' - elif self.pars.psf is not None: - psf = self.pars.psf - psfpath, psfxmlpath = psf.get_fullpath() else: psf = None @@ -424,28 +466,43 @@ def extract_sources_sextractor( self, image, psffile=None ): # Now that we have a psf, run sextractor (maybe a second time) # to get the actual measurements. SCLogger.debug( "detection: running sextractor with psf to get final source list" ) - sources, bkg, bkgsig = self._run_sextractor_once( image, apers=apers, - psffile=psfpath, tempname=tempnamebase ) + + if psf is not None: + psf_clip = psf.get_clip() + psf_norm = 1 / np.sqrt(np.sum(psf_clip ** 2)) # normalization factor for the sextractor thresholds + else: # we don't have a psf for some reason, use the "good enough" approximation + psf_norm = 3.0 + + sources, bkg, bkgsig = self._run_sextractor_once( + image, + apers=apers, + psffile=psfpath, + psfnorm=psf_norm, + tempname=tempnamebase, + ) SCLogger.debug( f"detection: sextractor found {len(sources.data)} sources" ) snr = sources.apfluxadu()[0] / sources.apfluxadu()[1] if snr.min() > self.pars.threshold: - SCLogger.warning( "SExtractor may not have detected everything down to your threshold." ) + warnings.warn( "SExtractor may not have detected everything down to your threshold." ) w = np.where( snr >= self.pars.threshold ) sources.data = sources.data[w] sources.num_sources = len( sources.data ) - sources._inf_aper_num = inf_aper_num + sources.inf_aper_num = self.pars.inf_aper_num + sources.best_aper_num = self.pars.best_aper_num finally: # Clean up the temporary files created (that weren't already cleaned up by _run_sextractor_once) sourcepath.unlink( missing_ok=True ) - if ( psffile is None ) and ( self.pars.psf is None ): - if psfpath is not None: psfpath.unlink( missing_ok=True ) - if psfxmlpath is not None: psfxmlpath.unlink( missing_ok=True ) + if psffile is None: + if psfpath is not None: + psfpath.unlink( missing_ok=True ) + if psfxmlpath is not None: + psfxmlpath.unlink( missing_ok=True ) return sources, psf, bkg, bkgsig - def _run_sextractor_once( self, image, apers=[5, ], psffile=None, tempname=None, do_not_cleanup=False ): + def _run_sextractor_once(self, image, apers=[5, ], psffile=None, psfnorm=3.0, tempname=None, do_not_cleanup=False): """Extract a SourceList from a FITS image using SExtractor. This function should not be called from outside this class. @@ -463,6 +520,12 @@ def _run_sextractor_once( self, image, apers=[5, ], psffile=None, tempname=None, File that has the PSF to use for PSF photometry. If None, won't do psf photometry. + psfnorm: float + The normalization of the PSF image (i.e., the sqrt of the + sum of squares of the psf values). This is used to set the + threshold for sextractor. When the PSF is not known, we + will use a rough approximation and set this value to 3.0. + tempname: str If not None, a filename base for where the catalog will be written. The source file will be written to @@ -619,8 +682,8 @@ def _run_sextractor_once( self, image, apers=[5, ], psffile=None, tempname=None, "-XML_NAME", tmpxml, "-PARAMETERS_NAME", paramfile, "-THRESH_TYPE", "RELATIVE", - "-DETECT_THRESH", str( self.pars.threshold / 3. ), - "-ANALYSIS_THRESH", str( self.pars.threshold / 3. ), + "-DETECT_THRESH", str( self.pars.threshold / psfnorm ), + "-ANALYSIS_THRESH", str( self.pars.threshold / psfnorm ), "-FILTER", "Y", "-FILTER_NAME", str(conv), "-WEIGHT_TYPE", "MAP_WEIGHT", @@ -631,7 +694,7 @@ def _run_sextractor_once( self, image, apers=[5, ], psffile=None, tempname=None, "-FLAG_TYPE", "OR", "-PHOT_APERTURES", ",".join( [ str(a*2.) for a in apers ] ), "-SATUR_LEVEL", str( image.instrument_object.average_saturation_limit( image ) ), - "-GAIN", "1.0", + "-GAIN", "1.0", # TODO: we should probably put the instrument gain here "-STARNNW_NAME", nnw, "-BACK_TYPE", "AUTO", "-BACK_SIZE", str( image.instrument_object.background_box_size ), diff --git a/pipeline/measuring.py b/pipeline/measuring.py index 33566609..aefedccf 100644 --- a/pipeline/measuring.py +++ b/pipeline/measuring.py @@ -39,15 +39,11 @@ def __init__(self, **kwargs): 'adjust the annulus size for each image based on the PSF width. ' ) - # TODO: should we choose the "best aperture" using the config, or should each Image have its own aperture? - self.chosen_aperture = self.add_par( - 'chosen_aperture', - 0, - [str, int], - 'The aperture radius that is used for photometry. ' - 'Choose either the index in the aperture_radii list, ' - 'the string "psf", or the string "auto" to choose ' - 'the best aperture in each image separately. ' + self.use_annulus_for_centroids = self.add_par( + 'use_annulus_for_centroids', + True, + bool, + 'Use the local background measurements via an annulus to adjust the centroids and second moments. ' ) self.analytical_cuts = self.add_par( @@ -217,6 +213,7 @@ def run(self, *args, **kwargs): # make sure to remember which cutout belongs to this measurement, # before either of them is in the DB and then use the cutouts_id instead m._cutouts_list_index = i + m.best_aperture = c.sources.best_aper_num m.aper_radii = c.sources.image.new_image.zp.aper_cor_radii # zero point corrected aperture radii @@ -239,14 +236,16 @@ def run(self, *args, **kwargs): flags, radii=m.aper_radii, annulus=annulus_radii_pixels, + local_bg=self.pars.use_annulus_for_centroids, ) m.flux_apertures = output['fluxes'] m.flux_apertures_err = [np.sqrt(output['variance']) * norm for norm in output['normalizations']] m.aper_radii = output['radii'] m.area_apertures = output['areas'] - m.background = output['background'] - m.background_err = np.sqrt(output['variance']) + m.bkg_mean = output['background'] + m.bkg_std = np.sqrt(output['variance']) + m.bkg_pix = output['n_pix_bg'] m.offset_x = output['offset_x'] m.offset_y = output['offset_y'] m.width = (output['major'] + output['minor']) / 2 @@ -288,29 +287,16 @@ def run(self, *args, **kwargs): m.flux_psf_err = fluxerr m.area_psf = area - # decide on the "best" aperture - if self.pars.chosen_aperture == 'auto': - raise NotImplementedError('Automatic aperture selection is not yet implemented.') - if self.pars.chosen_aperture == 'psf': - ap_index = -1 - elif isinstance(self.pars.chosen_aperture, int): - ap_index = self.pars.chosen_aperture - else: - raise ValueError( - f'Invalid value "{self.pars.chosen_aperture}" for chosen_aperture in the measuring parameters.' - ) - m.best_aperture = ap_index - # update the provenance m.provenance = prov m.provenance_id = prov.id # Apply analytic cuts to each stamp image, to rule out artefacts. m.disqualifier_scores = {} - if m.background != 0 and m.background_err > 0.1: - norm_data = (c.sub_nandata - m.background) / m.background_err # normalize + if m.bkg_mean != 0 and m.bkg_std > 0.1: + norm_data = (c.sub_nandata - m.bkg_mean) / m.bkg_std # normalize else: - warnings.warn(f'Background mean= {m.background}, std= {m.background_err}, normalization skipped!') + warnings.warn(f'Background mean= {m.bkg_mean}, std= {m.bkg_std}, normalization skipped!') norm_data = c.sub_nandata # no good background measurement, do not normalize! positives = np.sum(norm_data > self.pars.outlier_sigma) diff --git a/pipeline/subtraction.py b/pipeline/subtraction.py index 357efa2c..f66318bc 100644 --- a/pipeline/subtraction.py +++ b/pipeline/subtraction.py @@ -155,21 +155,12 @@ def _subtract_zogy(self, new_image, ref_image): ref_image_data = ref_image.data new_image_psf = new_image.psf.get_clip() ref_image_psf = ref_image.psf.get_clip() - new_image_noise = new_image.bkg_rms_estimate # TOOD: improve this by using a Background object? - ref_image_noise = 1.0 # proper coaddition images have noise=1.0 by construction + new_image_noise = new_image.bkg_rms_estimate + ref_image_noise = ref_image.bkg_rms_estimate new_image_flux_zp = 10 ** (0.4 * new_image.zp.zp) ref_image_flux_zp = 10 ** (0.4 * ref_image.zp.zp) # TODO: consider adding an estimate for the astrometric uncertainty dx, dy - # do some additional processing of the new image - mu, sigma = sigma_clipping(new_image_data) - new_image_data = (new_image_data - mu) / sigma # TODO: skip this if we already background subtracted - if new_image_noise is not None: - new_image_noise = new_image_noise / sigma - else: - new_image_noise = 1.0 # TODO: this can go away after we verify images always have background estimates! - new_image_flux_zp = new_image_flux_zp / sigma - new_image_data = self.inpainter.run(new_image_data, new_image.flags, new_image.weight) output = zogy_subtract( diff --git a/pipeline/top_level.py b/pipeline/top_level.py index aecd0689..9ed8ba2a 100644 --- a/pipeline/top_level.py +++ b/pipeline/top_level.py @@ -1,5 +1,6 @@ import os import datetime +import time import warnings import sqlalchemy as sa @@ -7,6 +8,7 @@ from pipeline.parameters import Parameters from pipeline.data_store import DataStore, UPSTREAM_STEPS from pipeline.preprocessing import Preprocessor +from pipeline.backgrounding import Backgrounder from pipeline.astro_cal import AstroCalibrator from pipeline.photo_cal import PhotCalibrator from pipeline.subtraction import Subtractor @@ -33,7 +35,7 @@ 'extraction': { 'sources': 'extractor', 'psf': 'extractor', - 'background': 'extractor', + 'bg': 'backgrounder', 'wcs': 'astrometor', 'zp': 'photometor', }, @@ -52,7 +54,18 @@ def __init__(self, **kwargs): super().__init__() self.example_pipeline_parameter = self.add_par( - 'example_pipeline_parameter', 1, int, 'an example pipeline parameter' + 'example_pipeline_parameter', 1, int, 'an example pipeline parameter', critical=False + ) + + self.save_before_subtraction = self.add_par( + 'save_before_subtraction', + True, + bool, + 'Save intermediate images to the database, ' + 'after doing extraction, background, and astro/photo calibration, ' + 'if there is no reference, will not continue to doing subtraction' + 'but will still save the products up to that point. ', + critical=False, ) self._enforce_no_new_attrs = True # lock against new parameters @@ -81,6 +94,12 @@ def __init__(self, **kwargs): self.pars.add_defaults_to_dict(extraction_config) self.extractor = Detector(**extraction_config) + # background estimation using either sep or other methods + background_config = self.config.value('extraction.bg', {}) + background_config.update(kwargs.get('extraction', {}).get('bg', {})) + self.pars.add_defaults_to_dict(background_config) + self.backgrounder = Backgrounder(**background_config) + # astrometric fit using a first pass of sextractor and then astrometric fit to Gaia astrometor_config = self.config.value('extraction.wcs', {}) astrometor_config.update(kwargs.get('extraction', {}).get('wcs', {})) @@ -94,8 +113,14 @@ def __init__(self, **kwargs): self.photometor = PhotCalibrator(**photometor_config) # make sure when calling get_critical_pars() these objects will produce the full, nested dictionary - siblings = {'sources': self.extractor.pars, 'wcs': self.astrometor.pars, 'zp': self.photometor.pars} + siblings = { + 'sources': self.extractor.pars, + 'bg': self.backgrounder.pars, + 'wcs': self.astrometor.pars, + 'zp': self.photometor.pars, + } self.extractor.pars.add_siblings(siblings) + self.backgrounder.pars.add_siblings(siblings) self.astrometor.pars.add_siblings(siblings) self.photometor.pars.add_siblings(siblings) @@ -254,7 +279,7 @@ def run(self, *args, **kwargs): with warnings.catch_warnings(record=True) as w: ds.warnings_list = w # appends warning to this list as it goes along # run dark/flat preprocessing, cut out a specific section of the sensor - # TODO: save the results as Image objects to DB and disk? Or save at the end? + SCLogger.info(f"preprocessor") ds = self.preprocessor.run(ds, session) ds.update_report('preprocessing', session) @@ -265,6 +290,11 @@ def run(self, *args, **kwargs): ds = self.extractor.run(ds, session) ds.update_report('extraction', session) + # find the background for this image + SCLogger.info(f"backgrounder for image id {ds.image.id}") + ds = self.backgrounder.run(ds, session) + ds.update_report('extraction', session) + # find astrometric solution, save WCS into Image object and FITS headers SCLogger.info(f"astrometor for image id {ds.image.id}") ds = self.astrometor.run(ds, session) @@ -275,6 +305,19 @@ def run(self, *args, **kwargs): ds = self.photometor.run(ds, session) ds.update_report('extraction', session) + if self.pars.save_before_subtraction: + t_start = time.perf_counter() + try: + SCLogger.info(f"Saving intermediate image for image id {ds.image.id}") + ds.save_and_commit(session=session) + except Exception as e: + ds.update_report('save intermediate', session) + SCLogger.error(f"Failed to save intermediate image for image id {ds.image.id}") + SCLogger.error(e) + raise e + + ds.runtimes['save_intermediate'] = time.perf_counter() - t_start + # fetch reference images and subtract them, save subtracted Image objects to DB and disk SCLogger.info(f"subtractor for image id {ds.image.id}") ds = self.subtractor.run(ds, session) @@ -298,6 +341,8 @@ def run(self, *args, **kwargs): # measure deep learning models on the cutouts/measurements # TODO: add this... + # TODO: add a saving step at the end too? + ds.finalize_report(session) return ds diff --git a/requirements.txt b/requirements.txt index cf8a1cb3..8a548f29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,6 @@ pandas==2.1.3 photutils==1.9.0 psutil==5.9.8 psycopg2==2.9.9 -pylandau==2.2.1 pytest==7.4.3 pytest-timestamper==0.0.10 python-dateutil==2.8.2 @@ -59,6 +58,7 @@ wget==3.2 # iniconfig==2.0.0 # kafka-python==2.0.2 # kiwisolver==1.4.4 +# pylandau==2.2.1 # matplotlib==3.7.1 # mpi4py==3.1.4 # msgpack==1.0.5 diff --git a/tests/conftest.py b/tests/conftest.py index 9ff71332..2a63d44d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,7 +4,6 @@ import uuid import shutil import pathlib -import logging import numpy as np diff --git a/tests/fixtures/decam.py b/tests/fixtures/decam.py index 8e70108f..c9925e73 100644 --- a/tests/fixtures/decam.py +++ b/tests/fixtures/decam.py @@ -27,6 +27,7 @@ from util.logger import SCLogger from util.cache import copy_to_cache, copy_list_to_cache, copy_from_cache, copy_list_from_cache + @pytest.fixture(scope='session') def decam_cache_dir(cache_dir): output = os.path.join(cache_dir, 'DECam') @@ -268,7 +269,7 @@ def decam_datastore( decam_exposure, 'N1', cache_dir=decam_cache_dir, - cache_base_name='115/c4d_20221104_074232_N1_g_Sci_VCOACQ', + cache_base_name='115/c4d_20221104_074232_N1_g_Sci_NBXRIO', save_original_image=True ) # This save is redundant, as the datastore_factory calls save_and_commit @@ -327,6 +328,7 @@ def decam_fits_image_filename(download_url, decam_cache_dir): except FileNotFoundError: pass + @pytest.fixture def decam_fits_image_filename2(download_url, decam_cache_dir): download_url = os.path.join(download_url, 'DECAM') @@ -345,6 +347,7 @@ def decam_fits_image_filename2(download_url, decam_cache_dir): except FileNotFoundError: pass + @pytest.fixture def decam_ref_datastore( code_version, download_url, decam_cache_dir, data_dir, datastore_factory ): filebase = 'DECaPS-West_20220112.g.32' diff --git a/tests/fixtures/pipeline_objects.py b/tests/fixtures/pipeline_objects.py index 93be2623..dd48ddae 100644 --- a/tests/fixtures/pipeline_objects.py +++ b/tests/fixtures/pipeline_objects.py @@ -15,6 +15,7 @@ from models.image import Image from models.source_list import SourceList from models.psf import PSF +from models.background import Background from models.world_coordinates import WorldCoordinates from models.zero_point import ZeroPoint from models.cutouts import Cutouts @@ -23,6 +24,7 @@ from pipeline.data_store import DataStore from pipeline.preprocessing import Preprocessor from pipeline.detection import Detector +from pipeline.backgrounding import Backgrounder from pipeline.astro_cal import AstroCalibrator from pipeline.photo_cal import PhotCalibrator from pipeline.coaddition import Coadder, CoaddPipeline @@ -79,6 +81,27 @@ def extractor(extractor_factory): return extractor_factory() +@pytest.fixture(scope='session') +def backgrounder_factory(test_config): + + def make_backgrounder(): + bg = Backgrounder(**test_config.value('extraction.bg')) + bg.pars._enforce_no_new_attrs = False + bg.pars.test_parameter = bg.pars.add_par( + 'test_parameter', 'test_value', str, 'parameter to define unique tests', critical=True + ) + bg.pars._enforce_no_new_attrs = True + + return bg + + return make_backgrounder + + +@pytest.fixture +def backgrounder(backgrounder_factory): + return backgrounder_factory() + + @pytest.fixture(scope='session') def astrometor_factory(test_config): @@ -231,6 +254,7 @@ def measurer(measurer_factory): def pipeline_factory( preprocessor_factory, extractor_factory, + backgrounder_factory, astrometor_factory, photometor_factory, subtractor_factory, @@ -243,12 +267,19 @@ def make_pipeline(): p = Pipeline(**test_config.value('pipeline')) p.preprocessor = preprocessor_factory() p.extractor = extractor_factory() + p.backgrounder = backgrounder_factory() p.astrometor = astrometor_factory() p.photometor = photometor_factory() # make sure when calling get_critical_pars() these objects will produce the full, nested dictionary - siblings = {'sources': p.extractor.pars, 'wcs': p.astrometor.pars, 'zp': p.photometor.pars} + siblings = { + 'sources': p.extractor.pars, + 'bg': p.backgrounder.pars, + 'wcs': p.astrometor.pars, + 'zp': p.photometor.pars + } p.extractor.pars.add_siblings(siblings) + p.backgrounder.pars.add_siblings(siblings) p.astrometor.pars.add_siblings(siblings) p.photometor.pars.add_siblings(siblings) @@ -283,8 +314,14 @@ def make_pipeline(): p.photometor = photometor_factory() # make sure when calling get_critical_pars() these objects will produce the full, nested dictionary - siblings = {'sources': p.extractor.pars, 'wcs': p.astrometor.pars, 'zp': p.photometor.pars} + siblings = { + 'sources': p.extractor.pars, + 'bg': p.backgrounder.pars, + 'wcs': p.astrometor.pars, + 'zp': p.photometor.pars, + } p.extractor.pars.add_siblings(siblings) + p.backgrounder.pars.add_siblings(siblings) p.astrometor.pars.add_siblings(siblings) p.photometor.pars.add_siblings(siblings) @@ -483,11 +520,11 @@ def make_datastore( else: raise e # if any other error comes up, raise it - ############# extraction to create sources / PSF / WCS / ZP ############# + ############# extraction to create sources / PSF / BG / WCS / ZP ############# if ( ( not os.getenv( "LIMIT_CACHE_USAGE" ) ) and ( cache_dir is not None ) and ( cache_base_name is not None ) ): - # try to get the SourceList, PSF, WCS and ZP from cache + # try to get the SourceList, PSF, BG, WCS and ZP from cache prov = Provenance( code_version=code_version, process='extraction', @@ -553,7 +590,35 @@ def make_datastore( # make sure this is saved to the archive as well ds.psf.save(verify_md5=False, overwrite=True) - ############## astro_cal to create wcs ################ + # try to get the background from cache + cache_name = f'{cache_base_name}.bg_{prov.id[:6]}.h5.json' + bg_cache_path = os.path.join(cache_dir, cache_name) + if os.path.isfile(bg_cache_path): + SCLogger.debug('loading background from cache. ') + ds.bg = copy_from_cache(Background, cache_dir, cache_name) + + # if BG already exists on the database, use that instead of this one + existing = session.scalars( + sa.select(Background).where(Background.filepath == ds.bg.filepath) + ).first() + if existing is not None: + # overwrite the existing row data using the JSON cache file + for key in sa.inspect(ds.bg).mapper.columns.keys(): + value = getattr(ds.bg, key) + if ( + key not in ['id', 'image_id', 'created_at', 'modified'] and + value is not None + ): + setattr(existing, key, value) + ds.bg = existing + + ds.bg.provenance = prov + ds.bg.image = ds.image + + # make sure this is saved to the archive as well + ds.bg.save(verify_md5=False, overwrite=True) + + # try to get the WCS from cache cache_name = f'{cache_base_name}.wcs_{prov.id[:6]}.txt.json' wcs_cache_path = os.path.join(cache_dir, cache_name) if os.path.isfile(wcs_cache_path): @@ -588,8 +653,7 @@ def make_datastore( # make sure this is saved to the archive as well ds.wcs.save(verify_md5=False, overwrite=True) - ########### photo_cal to create zero point ############ - + # try to get the ZP from cache cache_name = cache_base_name + '.zp.json' zp_cache_path = os.path.join(cache_dir, cache_name) if os.path.isfile(zp_cache_path): @@ -621,11 +685,12 @@ def make_datastore( ds.zp.provenance = prov ds.zp.sources = ds.sources - if ds.sources is None or ds.psf is None or ds.wcs is None or ds.zp is None: # redo extraction + # if any data product is missing, must redo the extraction step + if ds.sources is None or ds.psf is None or ds.bg is None or ds.wcs is None or ds.zp is None: SCLogger.debug('extracting sources. ') ds = p.extractor.run(ds, session) - ds.sources.save() + ds.sources.save(overwrite=True) if cache_dir is not None and cache_base_name is not None: output_path = copy_to_cache(ds.sources, cache_dir) if cache_dir is not None and cache_base_name is not None and output_path != sources_cache_path: @@ -637,9 +702,18 @@ def make_datastore( if cache_dir is not None and cache_base_name is not None and output_path != psf_cache_path: warnings.warn(f'cache path {psf_cache_path} does not match output path {output_path}') + SCLogger.debug('Running background estimation') + ds = p.backgrounder.run(ds, session) + + ds.bg.save(overwrite=True) + if cache_dir is not None and cache_base_name is not None: + output_path = copy_to_cache(ds.bg, cache_dir) + if cache_dir is not None and cache_base_name is not None and output_path != bg_cache_path: + warnings.warn(f'cache path {bg_cache_path} does not match output path {output_path}') + SCLogger.debug('Running astrometric calibration') ds = p.astrometor.run(ds, session) - ds.wcs.save() + ds.wcs.save(overwrite=True) if ((cache_dir is not None) and (cache_base_name is not None) and (not os.getenv("LIMIT_CACHE_USAGE"))): output_path = copy_to_cache(ds.wcs, cache_dir) @@ -726,9 +800,7 @@ def make_datastore( parameters=prov_aligned_ref.parameters, upstreams=[ ds.image.provenance, - ds.sources.provenance, # this also includes the PSF's provenance - ds.wcs.provenance, - ds.zp.provenance, + ds.sources.provenance, # this also includes provs for PSF, BG, WCS, ZP ], process='alignment', is_testing=True, diff --git a/tests/fixtures/ptf.py b/tests/fixtures/ptf.py index 1fb671da..5374c04d 100644 --- a/tests/fixtures/ptf.py +++ b/tests/fixtures/ptf.py @@ -20,6 +20,7 @@ from models.image import Image from models.source_list import SourceList from models.psf import PSF +from models.background import Background from models.world_coordinates import WorldCoordinates from models.zero_point import ZeroPoint from models.reference import Reference @@ -172,7 +173,7 @@ def ptf_datastore(datastore_factory, ptf_exposure, ptf_ref, ptf_cache_dir, ptf_b ptf_exposure, 11, cache_dir=ptf_cache_dir, - cache_base_name='187/PTF_20110429_040004_11_R_Sci_QTD4UW', + cache_base_name='187/PTF_20110429_040004_11_R_Sci_BNKEKA', overrides={'extraction': {'threshold': 5}}, bad_pixel_map=ptf_bad_pixel_map, ) @@ -329,9 +330,10 @@ def ptf_aligned_images(request, ptf_cache_dir, data_dir, code_version): filenames = f.read().splitlines() output_images = [] for filename in filenames: - imfile, psffile = filename.split() + imfile, psffile, bgfile = filename.split() output_images.append(copy_from_cache(Image, cache_dir, imfile + '.image.fits')) output_images[-1].psf = copy_from_cache(PSF, cache_dir, psffile + '.fits') + output_images[-1].bg = copy_from_cache(Background, cache_dir, bgfile) output_images[-1].zp = copy_from_cache(ZeroPoint, cache_dir, imfile + '.zp') else: # no cache available ptf_reference_images = request.getfixturevalue('ptf_reference_images') @@ -351,22 +353,28 @@ def ptf_aligned_images(request, ptf_cache_dir, data_dir, code_version): filenames = [] psf_paths = [] + bg_paths = [] + # there's an implicit call to Image._make_aligned_images() here for image in coadd_image.aligned_images: image.save() filepath = copy_to_cache(image, cache_dir) if image.psf.filepath is None: # save only PSF objects that haven't been saved yet image.psf.save(overwrite=True) + if image.bg.filepath is None: # save only Background objects that haven't been saved yet + image.bg.save(overwrite=True) if not os.getenv( "LIMIT_CACHE_USAGE" ): - copy_to_cache(image.psf, cache_dir) - copy_to_cache(image.zp, cache_dir, filepath=filepath[:-len('.image.fits.json')]+'.zp.json') + copy_to_cache(image.psf, cache_dir) + copy_to_cache(image.bg, cache_dir) + copy_to_cache(image.zp, cache_dir, filepath=filepath[:-len('.image.fits.json')]+'.zp.json') filenames.append(image.filepath) psf_paths.append(image.psf.filepath) + bg_paths.append(image.bg.filepath) if not os.getenv( "LIMIT_CACHE_USAGE" ): os.makedirs(cache_dir, exist_ok=True) with open(os.path.join(cache_dir, 'manifest.txt'), 'w') as f: - for filename, psf_path in zip(filenames, psf_paths): - f.write(f'{filename} {psf_path}\n') + for filename, psf_path, bg_path in zip(filenames, psf_paths, bg_paths): + f.write(f'{filename} {psf_path} {bg_path}\n') output_images = coadd_image.aligned_images @@ -422,7 +430,7 @@ def ptf_ref( is_testing=True, ) - cache_base_name = f'187/PTF_20090405_073932_11_R_ComSci_{im_prov.id[:6]}_u-wswtff' + cache_base_name = f'187/PTF_20090405_073932_11_R_ComSci_{im_prov.id[:6]}_u-iqxrjn' # this provenance is used for sources, psf, wcs, zp sources_prov = Provenance( @@ -434,8 +442,9 @@ def ptf_ref( ) extensions = [ 'image.fits', - f'psf_{sources_prov.id[:6]}.fits', f'sources_{sources_prov.id[:6]}.fits', + f'psf_{sources_prov.id[:6]}.fits', + f'bg_{sources_prov.id[:6]}.h5', f'wcs_{sources_prov.id[:6]}.txt', 'zp' ] @@ -452,11 +461,6 @@ def ptf_ref( coadd_image.ref_image_id = ptf_reference_images[-1].id # make sure to replace the ID with the new DB value assert coadd_image.provenance_id == coadd_image.provenance.id - # get the PSF: - coadd_image.psf = copy_from_cache(PSF, ptf_cache_dir, cache_base_name + f'.psf_{sources_prov.id[:6]}.fits') - coadd_image.psf.provenance = sources_prov - assert coadd_image.psf.provenance_id == coadd_image.psf.provenance.id - # get the source list: coadd_image.sources = copy_from_cache( SourceList, ptf_cache_dir, cache_base_name + f'.sources_{sources_prov.id[:6]}.fits' @@ -464,6 +468,16 @@ def ptf_ref( coadd_image.sources.provenance = sources_prov assert coadd_image.sources.provenance_id == coadd_image.sources.provenance.id + # get the PSF: + coadd_image.psf = copy_from_cache(PSF, ptf_cache_dir, cache_base_name + f'.psf_{sources_prov.id[:6]}.fits') + coadd_image.psf.provenance = sources_prov + assert coadd_image.psf.provenance_id == coadd_image.psf.provenance.id + + # get the background: + coadd_image.bg = copy_from_cache(Background, ptf_cache_dir, cache_base_name + f'.bg_{sources_prov.id[:6]}.h5') + coadd_image.bg.provenance = sources_prov + assert coadd_image.bg.provenance_id == coadd_image.bg.provenance.id + # get the WCS: coadd_image.wcs = copy_from_cache( WorldCoordinates, ptf_cache_dir, cache_base_name + f'.wcs_{sources_prov.id[:6]}.txt' @@ -491,6 +505,7 @@ def ptf_ref( copy_to_cache(pipe.datastore.image, ptf_cache_dir) copy_to_cache(pipe.datastore.sources, ptf_cache_dir) copy_to_cache(pipe.datastore.psf, ptf_cache_dir) + copy_to_cache(pipe.datastore.bg, ptf_cache_dir) copy_to_cache(pipe.datastore.wcs, ptf_cache_dir) copy_to_cache(pipe.datastore.zp, ptf_cache_dir, cache_base_name + '.zp.json') diff --git a/tests/fixtures/simulated.py b/tests/fixtures/simulated.py index 5d62bc94..2ccf3d31 100644 --- a/tests/fixtures/simulated.py +++ b/tests/fixtures/simulated.py @@ -409,7 +409,7 @@ def sim_image_list( im.zp = ZeroPoint() im.zp.zp = np.random.uniform(25, 30) im.zp.dzp = np.random.uniform(0.01, 0.1) - im.zp.aper_cor_radii = im.instrument_object.standard_apertures() + im.zp.aper_cor_radii = [1.0, 2.0, 3.0, 5.0] im.zp.aper_cors = np.random.normal(0, 0.1, len(im.zp.aper_cor_radii)) im.zp.provenance = provenance_extra im.wcs = WorldCoordinates() diff --git a/tests/improc/test_alignment.py b/tests/improc/test_alignment.py index 8ac5968b..257554a8 100644 --- a/tests/improc/test_alignment.py +++ b/tests/improc/test_alignment.py @@ -35,10 +35,17 @@ def test_warp_decam( decam_datastore, decam_reference ): # Check a couple of spots on the image # First, around a star: assert ds.image.data[ 2223:2237, 545:559 ].sum() == pytest.approx( 58014.1, rel=0.01 ) - assert warped.data[ 2223:2237, 545:559 ].sum() == pytest.approx( 22597.9, rel=0.01 ) - # And a blank spot - assert ds.image.data[ 2243:2257, 575:589 ].sum() == pytest.approx( 35298.6, rel=0.01 ) # sky not subtracted - assert warped.data[ 2243:2257, 575:589 ].sum() == pytest.approx( 971.7, rel=0.01 ) + assert warped.data[ 2223:2237, 545:559 ].sum() == pytest.approx( 21602.75, rel=0.01 ) + + # And a blank spot (here we can do some statistics instead of hard coded values) + num_pix = ds.image.data[2243:2257, 575:589].size + bg_mean = num_pix * ds.image.bg.value + bg_noise = np.sqrt(num_pix) * ds.image.bg.noise + assert abs(ds.image.data[ 2243:2257, 575:589 ].sum() - bg_mean) < bg_noise + + bg_mean = 0 # assume the warped image is background subtracted + bg_noise = np.sqrt(num_pix) * ds.ref_image.bg.noise + assert abs(warped.data[ 2243:2257, 575:589 ].sum() - bg_mean) < bg_noise # Make sure the warped image WCS is about right. We don't # expect it to be exactly identical, but it should be very @@ -88,7 +95,7 @@ def test_alignment_in_image( ptf_reference_images, code_version ): aligned = new_image.aligned_images assert new_image.upstream_images == ptf_reference_images assert len(aligned) == len(ptf_reference_images) - assert np.array_equal(aligned[index].data, ptf_reference_images[index].data) + assert np.array_equal(aligned[index].data, ptf_reference_images[index].data_bgsub) ref = ptf_reference_images[index] # check that images are aligned properly @@ -112,7 +119,7 @@ def test_alignment_in_image( ptf_reference_images, code_version ): loaded_image = session.scalars(sa.select(Image).where(Image.id == new_image.id)).first() assert loaded_image is not None assert len(loaded_image.aligned_images) == len(ptf_reference_images) - assert np.array_equal(loaded_image.aligned_images[-1].data, ptf_reference_images[-1].data) + assert np.array_equal(loaded_image.aligned_images[-1].data, ptf_reference_images[-1].data_bgsub) # check that images are aligned properly for image in loaded_image.aligned_images: diff --git a/tests/improc/test_zogy.py b/tests/improc/test_zogy.py index 69aa0365..29ce4006 100644 --- a/tests/improc/test_zogy.py +++ b/tests/improc/test_zogy.py @@ -428,7 +428,8 @@ def test_subtraction_seeing_background(): raise ValueError( f'seeing: ({ref_seeing:.2f}, {new_seeing:.2f}), ' f'background: ({ref_bkg:.2f}, {new_bkg:.2f}), ' - f'expected/measured: ({expected:.3f}, {measured:.3f})' + f'expected/measured: ({expected:.3f}, {measured:.3f}), ' + f'loss: {(expected - measured) / expected:.3f}' ) diff --git a/tests/models/test_background.py b/tests/models/test_background.py new file mode 100644 index 00000000..fc58d3b6 --- /dev/null +++ b/tests/models/test_background.py @@ -0,0 +1,102 @@ +import pytest +import numpy as np +import h5py + +from models.provenance import Provenance +from models.background import Background + + +def test_save_load_backgrounds(decam_raw_image, code_version): + image = decam_raw_image + bg_mean = 3.14 + bg_var = 6.28 + + try: # cleanup at the end + # Create a background object with a scalar model: + b1 = Background( + format='scalar', + method='sep', + image=image, + value=bg_mean, + noise=np.sqrt(bg_var) + ) + + prov = Provenance( + code_version=code_version, + process='extraction', + parameters={'method': 'sep', 'format': 'scalar'}, + upstreams=[image.provenance], + is_testing=True, + ) + + b1.provenance = prov + + b1.save() + + # check the filename contains the provenance hash + assert prov.id[:6] in b1.get_fullpath() + + # check that the file contains what we expect: + with h5py.File(b1.get_fullpath(), 'r') as f: + # check there's a "background" group: + assert 'background' in f + bg = f['background'] + + # check the attributes: + assert bg.attrs['format'] == 'scalar' + assert bg.attrs['method'] == 'sep' + assert bg.attrs['value'] == bg_mean + assert bg.attrs['noise'] == np.sqrt(bg_var) + + # make a new background with some data: + b2 = Background( + format='map', + method='sep', + image=image, + value=bg_mean, + noise=np.sqrt(bg_var), + counts=np.random.normal(bg_mean, 1, size=(10, 10)), + variance=np.random.normal(bg_var, 1, size=(10, 10)), + ) + + prov = Provenance( + code_version=code_version, + process='extraction', + parameters={'method': 'sep', 'format': 'map'}, + upstreams=[image.provenance], + is_testing=True, + ) + + b2.provenance = prov + + with pytest.raises(RuntimeError, match='Counts shape .* does not match image shape .*'): + b2.save() + + b2.counts = np.random.normal(bg_mean, 1, size=image.data.shape) + b2.variance = np.random.normal(bg_var, 1, size=image.data.shape) + b2.save() + + # check the filename contains the provenance hash + assert prov.id[:6] in b2.get_fullpath() + + # check that the file contains what we expect: + with h5py.File(b2.get_fullpath(), 'r') as f: + # check there's a "background" group: + assert 'background' in f + bg = f['background'] + + # check the attributes: + assert bg.attrs['format'] == 'map' + assert bg.attrs['method'] == 'sep' + assert bg.attrs['value'] == bg_mean + assert bg.attrs['noise'] == np.sqrt(bg_var) + + # check the data: + assert np.allclose(bg['counts'], b2.counts) + assert np.allclose(bg['variance'], b2.variance) + + finally: + if 'b1' in locals(): + b1.delete_from_disk_and_database() + if 'b2' in locals(): + b2.delete_from_disk_and_database() diff --git a/tests/models/test_measurements.py b/tests/models/test_measurements.py index 430598ab..116962a7 100644 --- a/tests/models/test_measurements.py +++ b/tests/models/test_measurements.py @@ -13,8 +13,9 @@ from models.measurements import Measurements -def test_measurements_attributes(measurer, ptf_datastore): +def test_measurements_attributes(measurer, ptf_datastore, test_config): + aper_radii = test_config.value('extraction.sources.apertures') ds = measurer.run(ptf_datastore.cutouts) # check that the measurer actually loaded the measurements from db, and not recalculated assert len(ds.measurements) <= len(ds.cutouts) # not all cutouts have saved measurements @@ -28,7 +29,7 @@ def test_measurements_attributes(measurer, ptf_datastore): assert np.allclose(m.aper_radii, new_im.zp.aper_cor_radii) assert np.allclose( new_im.zp.aper_cor_radii, - new_im.psf.fwhm_pixels * np.array(new_im.instrument_object.standard_apertures()), + new_im.psf.fwhm_pixels * np.array(aper_radii), ) assert m.mjd == new_im.mjd assert m.exp_time == new_im.exp_time @@ -37,12 +38,24 @@ def test_measurements_attributes(measurer, ptf_datastore): original_flux = m.flux_apertures[m.best_aperture] # set the flux temporarily to something positive - m.flux_apertures[m.best_aperture] = 1000 - assert m.magnitude == -2.5 * np.log10(1000) + new_im.zp.zp + new_im.zp.aper_cors[m.best_aperture] + m.flux_apertures[0] = 1000 + assert m.mag_apertures[0] == -2.5 * np.log10(1000) + new_im.zp.zp + new_im.zp.aper_cors[0] + + m.flux_psf = 1000 + expected_mag = -2.5 * np.log10(1000) + new_im.zp.zp + assert m.mag_psf == expected_mag # set the flux temporarily to something negative - m.flux_apertures[m.best_aperture] = -1000 - assert np.isnan(m.magnitude) + m.flux_apertures[0] = -1000 + assert np.isnan(m.mag_apertures[0]) + + # check that background is subtracted from the "flux" and "magnitude" properties + if m.best_aperture == -1: + assert m.flux == m.flux_psf - m.bkg_mean * m.area_psf + assert m.magnitude > m.mag_psf # the magnitude has background subtracted from it + assert m.magnitude_err > m.mag_psf_err # the magnitude error is larger because of the error in background + else: + assert m.flux == m.flux_apertures[m.best_aperture] - m.bkg_mean * m.area_apertures[m.best_aperture] # set the flux and zero point to some randomly chosen values and test the distribution of the magnitude: fiducial_zp = new_im.zp.zp @@ -134,7 +147,7 @@ def test_filtering_measurements(ptf_datastore): )).all() assert len(ms) == len(measurements) # all measurements have the same filter - ms = session.scalars(sa.select(Measurements).where(Measurements.background > 0)).all() + ms = session.scalars(sa.select(Measurements).where(Measurements.bkg_mean > 0)).all() assert len(ms) <= len(measurements) # only some of the measurements have positive background ms = session.scalars(sa.select(Measurements).where( diff --git a/tests/models/test_objects.py b/tests/models/test_objects.py index f52807d7..192deae6 100644 --- a/tests/models/test_objects.py +++ b/tests/models/test_objects.py @@ -37,7 +37,7 @@ def test_lightcurves_from_measurements(sim_lightcurves): measured_flux = [] for m in lc: - measured_flux.append(m.flux_apertures[3] - m.background * m.area_apertures[3]) + measured_flux.append(m.flux_apertures[3] - m.bkg_mean * m.area_apertures[3]) expected_flux.append(m.sources.data['flux'][m.cutouts.index_in_sources]) expected_error.append(m.sources.data['flux_err'][m.cutouts.index_in_sources]) diff --git a/tests/models/test_provenance.py b/tests/models/test_provenance.py index 615c3318..f9c82042 100644 --- a/tests/models/test_provenance.py +++ b/tests/models/test_provenance.py @@ -2,6 +2,7 @@ import uuid import sqlalchemy as sa +from sqlalchemy.orm.exc import DetachedInstanceError from models.base import SmartSession from models.provenance import CodeHash, CodeVersion, Provenance @@ -314,3 +315,68 @@ def check_in_session( sess, obj ): if 'p4' in locals(): session.execute(sa.delete(Provenance).where(Provenance.id == p4.id)) session.commit() + + +def test_eager_load_upstreams( provenance_base ): + try: + with SmartSession() as session: + provenance_base = session.merge( provenance_base ) + p1 = Provenance( + process="test_process_1", + code_version=provenance_base.code_version, + parameters={'test_parameter': 'test_value'}, + upstreams=[ provenance_base ], + is_testing=True + ) + + p2 = Provenance( + process="test_process_2", + code_version=provenance_base.code_version, + parameters={'test_parameter': 'test_value'}, + upstreams=[ p1 ], + is_testing=True + ) + + p3 = Provenance( + process="test_process_3", + code_version=provenance_base.code_version, + parameters={'test_parameter': 'test_value'}, + upstreams=[ p2 ], + is_testing=True + ) + + p4 = Provenance( + process="test_process_4", + code_version=provenance_base.code_version, + parameters={'test_parameter': 'test_value'}, + upstreams=[ p3 ], + is_testing=True + ) + + session.add_all( [ p1, p2, p3, p4 ] ) + session.commit() + + # Now, in another session.... + with SmartSession() as session2: + p4 = session2.scalars(sa.select(Provenance).where(Provenance.id == p4.id)).first() + + # we are out of the session, so loading of upstream relationships is only for those eager loaded ones + assert len(p4.upstreams) == 1 # should be ok + assert len(p4.upstreams[0].upstreams) == 1 # this should also be ok + assert len(p4.upstreams[0].upstreams[0].upstreams) == 1 # this should also be ok, assuming join_depth=3 + + with pytest.raises(DetachedInstanceError): + p4.upstreams[0].upstreams[0].upstreams[0].upstreams # this should fail, as the join_depth is not enough + + finally: + with SmartSession() as session: + if 'p1' in locals(): + session.execute(sa.delete(Provenance).where(Provenance.id == p1.id)) + if 'p2' in locals(): + session.execute(sa.delete(Provenance).where(Provenance.id == p2.id)) + if 'p3' in locals(): + session.execute(sa.delete(Provenance).where(Provenance.id == p3.id)) + if 'p4' in locals(): + session.execute(sa.delete(Provenance).where(Provenance.id == p4.id)) + + session.commit() \ No newline at end of file diff --git a/tests/models/test_reports.py b/tests/models/test_reports.py index d395ec80..e1e33ead 100644 --- a/tests/models/test_reports.py +++ b/tests/models/test_reports.py @@ -93,48 +93,58 @@ def test_measure_runtime_memory(decam_exposure, decam_reference, pipeline_for_te t0 = time.perf_counter() - ds = p.run(decam_exposure, 'N1') - - assert p.preprocessor.has_recalculated - assert p.extractor.has_recalculated - assert p.astrometor.has_recalculated - assert p.photometor.has_recalculated - assert p.subtractor.has_recalculated - assert p.detector.has_recalculated - assert p.cutter.has_recalculated - assert p.measurer.has_recalculated - - measured_time = 0 - peak_memory = 0 - for step in ds.runtimes.keys(): # also make sure all the keys are present in both dictionaries - measured_time += ds.runtimes[step] + try: + ds = p.run(decam_exposure, 'N1') + + total_time = time.perf_counter() - t0 + + assert p.preprocessor.has_recalculated + assert p.extractor.has_recalculated + assert p.backgrounder.has_recalculated + assert p.astrometor.has_recalculated + assert p.photometor.has_recalculated + assert p.subtractor.has_recalculated + assert p.detector.has_recalculated + assert p.cutter.has_recalculated + assert p.measurer.has_recalculated + + measured_time = 0 + peak_memory = 0 + for step in ds.runtimes.keys(): # also make sure all the keys are present in both dictionaries + measured_time += ds.runtimes[step] + if parse_bool(os.getenv('SEECHANGE_TRACEMALLOC')): + peak_memory = max(peak_memory, ds.memory_usages[step]) + + print(f'total_time: {total_time:.1f}s') + print(f'measured_time: {measured_time:.1f}s') + pprint(ds.runtimes, sort_dicts=False) + assert measured_time > 0.98 * total_time # at least 99% of the time is accounted for + if parse_bool(os.getenv('SEECHANGE_TRACEMALLOC')): - peak_memory = max(peak_memory, ds.memory_usages[step]) - - total_time = time.perf_counter() - t0 - - print(f'total_time: {total_time:.1f}s') - print(f'measured_time: {measured_time:.1f}s') - pprint(ds.runtimes, sort_dicts=False) - assert measured_time > 0.99 * total_time # at least 99% of the time is accounted for - - if parse_bool(os.getenv('SEECHANGE_TRACEMALLOC')): - print(f'peak_memory: {peak_memory:.1f}MB') - pprint(ds.memory_usages, sort_dicts=False) - assert 1000.0 < peak_memory < 10000.0 # memory usage is in MB, takes between 1 and 10 GB - - with SmartSession() as session: - rep = session.scalars(sa.select(Report).where(Report.exposure_id == decam_exposure.id)).one() - assert rep is not None - assert rep.success - assert rep.process_runtime == ds.runtimes - assert rep.process_memory == ds.memory_usages - # 'preprocessing, extraction, subtraction, detection, cutting, measuring' - assert rep.progress_steps == ', '.join(PROCESS_OBJECTS.keys()) - assert rep.products_exist == 'image, sources, psf, wcs, zp, sub_image, detections, cutouts, measurements' - assert rep.products_committed == '' # we don't save the data store objects at any point? - assert rep.provenance.upstreams[0].id == ds.measurements[0].provenance.id - assert rep.num_prev_reports == 0 + print(f'peak_memory: {peak_memory:.1f}MB') + pprint(ds.memory_usages, sort_dicts=False) + assert 1000.0 < peak_memory < 10000.0 # memory usage is in MB, takes between 1 and 10 GB + + with SmartSession() as session: + rep = session.scalars(sa.select(Report).where(Report.exposure_id == decam_exposure.id)).one() + assert rep is not None + assert rep.success + assert rep.process_runtime == ds.runtimes + assert rep.process_memory == ds.memory_usages + # should contain: 'preprocessing, extraction, subtraction, detection, cutting, measuring' + assert rep.progress_steps == ', '.join(PROCESS_OBJECTS.keys()) + assert rep.products_exist == ('image, sources, psf, bg, wcs, zp, ' + 'sub_image, detections, cutouts, measurements') + assert rep.products_committed == 'image, sources, psf, bg, wcs, zp' # we use intermediate save + assert rep.provenance.upstreams[0].id == ds.measurements[0].provenance.id + assert rep.num_prev_reports == 0 + ds.save_and_commit(session=session) + rep.scan_datastore(ds, session=session) + assert rep.products_committed == ('image, sources, psf, bg, wcs, zp, ' + 'sub_image, detections, cutouts, measurements') + finally: + if 'ds' in locals(): + ds.delete_everything() def test_inject_warnings(decam_datastore, decam_reference, pipeline_for_tests, decam_default_calibrators): diff --git a/tests/models/test_source_list.py b/tests/models/test_source_list.py index 46bafc5c..a355edec 100644 --- a/tests/models/test_source_list.py +++ b/tests/models/test_source_list.py @@ -155,8 +155,7 @@ def test_read_sextractor( ztf_filepath_sources ): assert sources.num_sources == 112 assert sources.good.sum() == 105 assert sources.aper_rads == [ 1.0, 2.5 ] - assert sources._inf_aper_num is None - assert sources.inf_aper_num == 1 + assert sources.inf_aper_num is None assert sources.x[0] == pytest.approx( 798.24, abs=0.01 ) assert sources.y[0] == pytest.approx( 17.14, abs=0.01 ) assert sources.x[50] == pytest.approx( 899.33, abs=0.01 ) @@ -243,12 +242,12 @@ def test_calc_apercor( decam_datastore ): sources = decam_datastore.get_sources() # These numbers are when you don't use is_star at all: - assert sources.calc_aper_cor() == pytest.approx(-0.4509, abs=0.01) - assert sources.calc_aper_cor(aper_num=1) == pytest.approx(-0.177, abs=0.01) - assert sources.calc_aper_cor(inf_aper_num=7) == pytest.approx(-0.4509, abs=0.01) - assert sources.calc_aper_cor(inf_aper_num=2) == pytest.approx(-0.428, abs=0.01) - assert sources.calc_aper_cor(aper_num=2) == pytest.approx(-0.028, abs=0.01) - assert sources.calc_aper_cor(aper_num=2, inf_aper_num=7) == pytest.approx(-0.02356, abs=0.01) + assert sources.calc_aper_cor() == pytest.approx(-0.1768, abs=0.01) + assert sources.calc_aper_cor(aper_num=1) == pytest.approx(-0.0258, abs=0.01) + assert sources.calc_aper_cor(inf_aper_num=3) == pytest.approx(-0.1768, abs=0.01) + assert sources.calc_aper_cor(inf_aper_num=1) == pytest.approx(-0.1508, abs=0.01) + assert sources.calc_aper_cor(aper_num=2) == pytest.approx(-0.00629, abs=0.01) + assert sources.calc_aper_cor(aper_num=2, inf_aper_num=3) == pytest.approx(-0.00629, abs=0.01) # The numbers below are what you get when you use CLASS_STAR in SourceList.is_star # assert sources.calc_aper_cor() == pytest.approx( -0.457, abs=0.01 ) diff --git a/tests/pipeline/test_backgrounding.py b/tests/pipeline/test_backgrounding.py new file mode 100644 index 00000000..0d995ad5 --- /dev/null +++ b/tests/pipeline/test_backgrounding.py @@ -0,0 +1,52 @@ +import pytest +import uuid + +import numpy as np + +from improc.tools import sigma_clipping + + +def test_measuring_background(decam_processed_image, backgrounder): + backgrounder.pars.test_parameter = uuid.uuid4().hex # make sure there is no hashed value + ds = backgrounder.run(decam_processed_image) + + # check that the background is statistically similar to the image stats + mu, sig = sigma_clipping(ds.image.nandata) + assert mu == pytest.approx(ds.bg.value, rel=0.01) + assert sig == pytest.approx(ds.bg.noise, rel=0.2) # this is really a very rough estimate + + # is the background subtracted image a good representation? + mu, sig = sigma_clipping(ds.image.nandata_bgsub) # also checks that nandata_bgsub exists + assert mu == pytest.approx(0, abs=sig) + assert sig < 10 + + # most of the pixels are inside a 3 sigma range + assert np.sum(np.abs(ds.image.nandata_bgsub) < 3 * sig) > 0.9 * ds.image.nandata.size + + # this is not true of the original image + assert np.sum(np.abs(ds.image.nandata) < 3 * sig) < 0.001 * ds.image.nandata.size + + # try to do the background again, but this time using the "zero" method + backgrounder.pars.method = 'zero' + ds = backgrounder.run(ds) + assert ds.bg.method == 'zero' + assert ds.bg.value == 0 + assert ds.bg.noise == 0 + assert np.array_equal(ds.image.data, ds.image.data_bgsub) + + +def test_warnings_and_exceptions(decam_datastore, backgrounder): + backgrounder.pars.inject_warnings = 1 + + with pytest.warns(UserWarning) as record: + backgrounder.run(decam_datastore) + assert len(record) > 0 + assert any("Warning injected by pipeline parameters in process 'backgrounding'." in str(w.message) for w in record) + + backgrounder.pars.inject_warnings = 0 + backgrounder.pars.inject_exceptions = 1 + with pytest.raises(Exception) as excinfo: + ds = backgrounder.run(decam_datastore) + ds.reraise() + assert "Exception injected by pipeline parameters in process 'backgrounding'." in str(excinfo.value) + ds.read_exception() diff --git a/tests/pipeline/test_coaddition.py b/tests/pipeline/test_coaddition.py index 5cbb194d..700fddeb 100644 --- a/tests/pipeline/test_coaddition.py +++ b/tests/pipeline/test_coaddition.py @@ -21,8 +21,8 @@ from pipeline.photo_cal import PhotCalibrator -def estimate_psf_width(data, sz=15, upsampling=25): - """Extract a bright star and estimate its FWHM. +def estimate_psf_width(data, sz=7, upsampling=50, num_stars=20): + """Extract a few bright stars and estimate their median FWHM. This is a very rough-and-dirty method used only for testing. @@ -34,10 +34,13 @@ def estimate_psf_width(data, sz=15, upsampling=25): The image data. sz: int The size of the box to extract around the star. - Default is 15. + Default is 7. upsampling: int The factor by which to up-sample the PSF. - Default is 25. + Default is 50. + num_stars: int + The number of stars to use to estimate the FWHM. + Default is 20. Returns ------- @@ -51,49 +54,58 @@ def estimate_psf_width(data, sz=15, upsampling=25): data[:, 0:sz] = np.nan data[:, -sz:] = np.nan - psf = extract_psf_surrogate(data, sz=sz, upsampling=upsampling) - flux = [] - area = [] - radii = np.array(range(1, psf.shape[0], 2)) - x, y = np.meshgrid(np.arange(psf.shape[0]), np.arange(psf.shape[1])) - rmap = np.sqrt((x - psf.shape[0] // 2) ** 2 + (y - psf.shape[1] // 2) ** 2) - - for r in radii: - mask = (rmap <= r + 1) & (rmap > r - 1) - area.append(np.sum(mask)) - flux.append(np.sum(psf[mask])) - - flux = np.array(flux) - area = np.array(area, dtype=float) - area[area == 0] = np.nan - flux_n = flux / area # normalize by the area of the annulus - - # go over the flux difference curve and find where it drops below half the peak flux: - peak = np.nanmax(flux_n) - idx = np.where(flux_n <= peak / 2)[0][0] - - fwhm = radii[idx] * 2 / upsampling + fwhms = [] + for i in range(num_stars): + psf = extract_psf_surrogate(data, sz=sz, upsampling=upsampling) + flux = [] + area = [] + radii = np.array(range(1, psf.shape[0] // 2, 2)) + x, y = np.meshgrid(np.arange(psf.shape[0]), np.arange(psf.shape[1])) + rmap = np.sqrt((x - psf.shape[1] // 2) ** 2 + (y - psf.shape[0] // 2) ** 2) + + for r in radii: + mask = (rmap <= r + 1) & (rmap > r - 1) + area.append(np.sum(mask)) + flux.append(np.sum(psf[mask])) + + flux = np.array(flux) + area = np.array(area, dtype=float) + area[area == 0] = np.nan + flux_n = flux / area # normalize by the area of the annulus + + # go over the flux difference curve and find where it drops below half the peak flux: + peak = np.nanmax(flux_n) + idx = np.where(flux_n <= peak / 2)[0][0] + + fwhm = radii[idx] * 2 / upsampling + fwhms.append(fwhm) + + fwhm = np.nanmedian(fwhms) + print(f'fwhm median= {fwhm}, fwhm_err= {np.std(fwhms)}') return fwhm -def extract_psf_surrogate(data, sz=15, upsampling=25): +def extract_psf_surrogate(data, sz=7, upsampling=50): """Extract a rough estimate for the PSF from the brightest (non-flagged) star in the image. This is a very rough-and-dirty method used only for testing. Assumes the data array has NaNs at all masked pixel locations. + Will mask the area of the chosen star so that the same array can be + re-used to find progressively fainter stars. + Parameters ---------- data: ndarray The image data. sz: int The size of the box to extract around the star. - Default is 15. + Default is 7. upsampling: int The factor by which to up-sample the PSF. - Default is 25. + Default is 50. Returns ------- @@ -110,7 +122,8 @@ def extract_psf_surrogate(data, sz=15, upsampling=25): edge_y1 = max(0, y - sz) edge_y2 = min(data.shape[0], y + sz) - psf = data[edge_y1:edge_y2, edge_x1:edge_x2] + psf = data[edge_y1:edge_y2, edge_x1:edge_x2].copy() + data[edge_y1:edge_y2, edge_x1:edge_x2] = np.nan # can re-use this array to find other stars # up-sample the PSF by the given factor: psf = ifft2(fftshift(np.pad(fftshift(fft2(psf)), sz*upsampling))).real @@ -260,20 +273,19 @@ def test_zogy_vs_naive(ptf_aligned_images, coadder): # get the FWHM estimate for the regular images and for the coadd fwhms = [] for im in ptf_aligned_images: - im_nan = im.data.copy() - im_nan[im.flags > 0] = np.nan - fwhms.append(estimate_psf_width(im_nan)) + # choose an area in the middle of the image + fwhms.append(estimate_psf_width(im.nandata[1800:2600, 600:1400])) fwhms = np.array(fwhms) zogy_im_nans = zogy_im.copy() zogy_im_nans[zogy_fl > 0] = np.nan - zogy_fwhm = estimate_psf_width(zogy_im_nans) + zogy_fwhm = estimate_psf_width(zogy_im_nans[1800:2600, 600:1400]) naive_im_nans = naive_im.copy() naive_im_nans[naive_fl > 0] = np.nan - naive_fwhm = estimate_psf_width(naive_im_nans) + naive_fwhm = estimate_psf_width(naive_im_nans[1800:2600, 600:1400]) - assert all(zogy_fwhm <= fwhms) # the ZOGY PSF should be narrower than original PSFs + assert zogy_fwhm < np.mean(fwhms) # the ZOGY PSF should be narrower than original PSFs assert zogy_fwhm < naive_fwhm @@ -449,8 +461,8 @@ def test_coaddition_pipeline_outputs(ptf_reference_images, ptf_aligned_images): # check that the ZOGY PSF width is similar to the PSFex result assert np.max(coadd_image.zogy_psf) == pytest.approx(np.max(coadd_image.psf.get_clip()), abs=0.01) - zogy_fwhm = estimate_psf_width(coadd_image.zogy_psf) - psfex_fwhm = estimate_psf_width(np.pad(coadd_image.psf.get_clip(), 20)) # pad so extract_psf_surrogate works + zogy_fwhm = estimate_psf_width(coadd_image.zogy_psf, num_stars=1) + psfex_fwhm = estimate_psf_width(np.pad(coadd_image.psf.get_clip(), 20), num_stars=1) # pad so extract_psf_surrogate works assert zogy_fwhm == pytest.approx(psfex_fwhm, rel=0.1) # check that the S/N is consistent with a coadd @@ -466,8 +478,8 @@ def test_coaddition_pipeline_outputs(ptf_reference_images, ptf_aligned_images): # zogy background noise is normalized by construction assert bkg_zogy == pytest.approx(1.0, abs=0.1) - # S/N should be sqrt(N) better # TODO: why is the zogy S/N 15% better than expected?? - assert snr_zogy == pytest.approx(mean_snr * np.sqrt(len(ptf_reference_images)), rel=0.2) + # S/N should be sqrt(N) better # TODO: why is the zogy S/N 20% better than expected?? + assert snr_zogy == pytest.approx(mean_snr * np.sqrt(len(ptf_reference_images)), rel=0.5) finally: if 'coadd_image' in locals(): diff --git a/tests/pipeline/test_extraction.py b/tests/pipeline/test_extraction.py index bfd35413..9bd472c0 100644 --- a/tests/pipeline/test_extraction.py +++ b/tests/pipeline/test_extraction.py @@ -137,13 +137,28 @@ def test_sextractor_extract_once( decam_datastore, extractor ): assert sourcelist.num_sources == 5611 assert len(sourcelist.data) == sourcelist.num_sources assert sourcelist.aper_rads == [ 5. ] - assert sourcelist._inf_aper_num is None - assert sourcelist.inf_aper_num == 0 assert sourcelist.info['SEXAPED1'] == 10.0 assert sourcelist.info['SEXAPED2'] == 0. assert sourcelist.info['SEXBKGND'] == pytest.approx( 179.8, abs=0.1 ) + snr = sourcelist.apfluxadu()[0] / sourcelist.apfluxadu()[1] + # print( + # f'sourcelist.x.min()= {sourcelist.x.min()}', + # f'sourcelist.x.max()= {sourcelist.x.max()}', + # f'sourcelist.y.min()= {sourcelist.y.min()}', + # f'sourcelist.y.max()= {sourcelist.y.max()}', + # f'sourcelist.errx.min()= {sourcelist.errx.min()}', + # f'sourcelist.errx.max()= {sourcelist.errx.max()}', + # f'sourcelist.erry.min()= {sourcelist.erry.min()}', + # f'sourcelist.erry.max()= {sourcelist.erry.max()}', + # f'sourcelist.apfluxadu()[0].min()= {sourcelist.apfluxadu()[0].min()}', + # f'sourcelist.apfluxadu()[0].max()= {sourcelist.apfluxadu()[0].max()}', + # f'snr.min()= {snr.min()}', + # f'snr.max()= {snr.max()}', + # f'snr.mean()= {snr.mean()}', + # f'snr.std()= {snr.std()}' + # ) assert sourcelist.x.min() == pytest.approx( 16.0, abs=0.1 ) assert sourcelist.x.max() == pytest.approx( 2039.6, abs=0.1 ) assert sourcelist.y.min() == pytest.approx( 16.264, abs=0.1 ) @@ -156,24 +171,32 @@ def test_sextractor_extract_once( decam_datastore, extractor ): assert ( np.sqrt( sourcelist.vary ) == sourcelist.erry ).all() assert sourcelist.apfluxadu()[0].min() == pytest.approx( -656.8731, rel=1e-5 ) assert sourcelist.apfluxadu()[0].max() == pytest.approx( 2850920.0, rel=1e-5 ) - snr = sourcelist.apfluxadu()[0] / sourcelist.apfluxadu()[1] assert snr.min() == pytest.approx( -9.91, abs=0.1 ) assert snr.max() == pytest.approx( 2348.2166, abs=1. ) assert snr.mean() == pytest.approx( 146.80, abs=0.1 ) assert snr.std() == pytest.approx( 285.4, abs=1. ) # Test multiple apertures - sourcelist, _, _ = extractor._run_sextractor_once( decam_datastore.image, apers=[2, 5] ) + sourcelist, _, _ = extractor._run_sextractor_once( decam_datastore.image, apers=[ 2., 5. ]) assert sourcelist.num_sources == 5611 # It *finds* the same things assert len(sourcelist.data) == sourcelist.num_sources assert sourcelist.aper_rads == [ 2., 5. ] - assert sourcelist._inf_aper_num is None - assert sourcelist.inf_aper_num == 1 assert sourcelist.info['SEXAPED1'] == 4.0 assert sourcelist.info['SEXAPED2'] == 10.0 assert sourcelist.info['SEXBKGND'] == pytest.approx( 179.8, abs=0.1 ) + + # print( + # f'sourcelist.x.min()= {sourcelist.x.min()}', + # f'sourcelist.x.max()= {sourcelist.x.max()}', + # f'sourcelist.y.min()= {sourcelist.y.min()}', + # f'sourcelist.y.max()= {sourcelist.y.max()}', + # f'sourcelist.apfluxadu(apnum=1)[0].min()= {sourcelist.apfluxadu(apnum=1)[0].min()}', + # f'sourcelist.apfluxadu(apnum=1)[0].max()= {sourcelist.apfluxadu(apnum=1)[0].max()}', + # f'sourcelist.apfluxadu(apnum=0)[0].min()= {sourcelist.apfluxadu(apnum=0)[0].min()}', + # f'sourcelist.apfluxadu(apnum=0)[0].max()= {sourcelist.apfluxadu(apnum=0)[0].max()}' + # ) assert sourcelist.x.min() == pytest.approx( 16.0, abs=0.1 ) assert sourcelist.x.max() == pytest.approx( 2039.6, abs=0.1 ) assert sourcelist.y.min() == pytest.approx( 16.264, abs=0.1 ) @@ -209,7 +232,7 @@ def test_run_psfex( decam_datastore, extractor ): assert psf._header['CHI2'] == pytest.approx( 0.9, abs=0.1 ) bio = io.BytesIO( psf._info.encode( 'utf-8' ) ) psfstats = votable.parse( bio ).get_table_by_index(1) - assert psfstats.array['FWHM_FromFluxRadius_Max'] == pytest.approx( 4.31, abs=0.01 ) + assert psfstats.array['FWHM_FromFluxRadius_Max'] == pytest.approx( 4.33, abs=0.01 ) assert not tmppsffile.exists() assert not tmppsfxmlfile.exists() @@ -253,29 +276,28 @@ def test_extract_sources_sextractor( decam_datastore, extractor, provenance_base if use: ofp.write( f"image;circle({x+1},{y+1},6) # color=blue width=2\n" ) - assert sources.num_sources == 5500 + assert sources.num_sources > 5000 assert sources.num_sources == len(sources.data) - assert sources.aper_rads == pytest.approx( [ 2.885, 4.286, 8.572, 12.858, - 17.145, 21.431, 30.003, 42.862 ], abs=0.01 ) - assert sources._inf_aper_num == 5 - assert sources.inf_aper_num == 5 + expected_radii = np.array([1.0, 2.0, 3.0, 5.0]) * psf.fwhm_pixels + assert sources.aper_rads == pytest.approx(expected_radii, abs=0.01 ) + assert sources.inf_aper_num == -1 assert psf.fwhm_pixels == pytest.approx( 4.286, abs=0.01 ) assert psf.fwhm_pixels == pytest.approx( psf.header['PSF_FWHM'], rel=1e-5 ) assert psf.data.shape == ( 6, 25, 25 ) assert psf.image_id == ds.image.id - assert sources.apfluxadu()[0].min() == pytest.approx( 200.34559, rel=1e-5 ) - assert sources.apfluxadu()[0].max() == pytest.approx( 1105999.625, rel=1e-5 ) - assert sources.apfluxadu()[0].mean() == pytest.approx( 36779.797 , rel=1e-5 ) - assert sources.apfluxadu()[0].std() == pytest.approx( 121950.04 , rel=1e-5 ) + assert sources.apfluxadu()[0].min() == pytest.approx( 275, rel=0.01 ) + assert sources.apfluxadu()[0].max() == pytest.approx( 2230000, rel=0.01 ) + assert sources.apfluxadu()[0].mean() == pytest.approx( 54000, rel=0.01 ) + assert sources.apfluxadu()[0].std() == pytest.approx( 196000, rel=0.01 ) - assert sources.good.sum() == 3638 + assert sources.good.sum() == pytest.approx(3000, rel=0.01) # This value is what you get using the SPREAD_MODEL parameter # assert sources.is_star.sum() == 4870 # assert ( sources.good & sources.is_star ).sum() == 3593 # This is what you get with CLASS_STAR - assert sources.is_star.sum() == 337 - assert ( sources.good & sources.is_star ).sum() == 61 + assert sources.is_star.sum() == pytest.approx(325, rel=0.01) + assert ( sources.good & sources.is_star ).sum() == pytest.approx(70, abs=5) try: # make sure saving the PSF and source list goes as expected, and cleanup at the end psf.provenance = provenance_base @@ -288,95 +310,12 @@ def test_extract_sources_sextractor( decam_datastore, extractor, provenance_base assert re.match(r'\d{3}/c4d_\d{8}_\d{6}_N1_g_Sci_.{6}.sources_.{6}.fits', sources.filepath) assert os.path.isfile(os.path.join(data_dir, sources.filepath)) + # TODO: add background object here + finally: # cleanup psf.delete_from_disk_and_database() sources.delete_from_disk_and_database() -# TODO : add tests that handle different combinations -# of measure_psf and psf being passed to the Detector constructor - - -# TODO: is this test really the same as the one above? -def test_run_detection_sextractor( decam_datastore, extractor ): - ds = decam_datastore - - # det = Detector( method='sextractor', measure_psf=True, threshold=5.0 ) - extractor.pars.method = 'sextractor' - extractor.measure_psf = True - extractor.pars.threshold = 5.0 - extractor.pars.test_parameter = uuid.uuid4().hex - ds = extractor.run( ds ) - - assert extractor.has_recalculated - assert ds.sources.num_sources == 5500 - assert ds.sources.num_sources == len(ds.sources.data) - assert ds.sources.aper_rads == pytest.approx( [ 2.88551706, 4.28627014, 8.57254028, 12.85881042, 17.14508057, - 21.43135071, 30.00389099, 42.86270142], abs=0.01 ) - assert ds.sources._inf_aper_num == 5 - assert ds.sources.inf_aper_num == 5 - assert ds.psf.fwhm_pixels == pytest.approx( 4.286, abs=0.01 ) - assert ds.psf.fwhm_pixels == pytest.approx( ds.psf.header['PSF_FWHM'], rel=1e-5 ) - assert ds.psf.data.shape == ( 6, 25, 25 ) - assert ds.psf.image_id == ds.image.id - - assert ds.sources.apfluxadu()[0].min() == pytest.approx( 200.3456, rel=1e-5 ) - assert ds.sources.apfluxadu()[0].max() == pytest.approx( 1105999.6, rel=1e-5 ) - assert ds.sources.apfluxadu()[0].mean() == pytest.approx( 36779.797, rel=1e-5 ) - assert ds.sources.apfluxadu()[0].std() == pytest.approx( 121950.04 , rel=1e-5 ) - - assert ds.sources.good.sum() == 3638 - # This value is what you get using the SPREAD_MODEL parameter - # assert ds.sources.is_star.sum() == 4870 - # assert ( ds.sources.good & ds.sources.is_star ).sum() == 3593 - # This value is what you get using the CLASS_STAR parameter - assert ds.sources.is_star.sum() == 337 - assert ( ds.sources.good & ds.sources.is_star ).sum() == 61 - - # TODO : actually think about these psf fluxes and how they compare - # to the aperture fluxes (esp. the large-aperture fluxes). Try to - # understand what SExtractor psf weighted photometry actually - # does.... Preliminary investigations suggest that something may be - # wrong. - - assert ds.sources.psffluxadu()[0].min() == 0.0 - assert ds.sources.psffluxadu()[0].max() == pytest.approx( 1725000.0, rel=1e-2 ) - assert ds.sources.psffluxadu()[0].mean() == pytest.approx( 48000.0, rel=1e-2 ) - assert ds.sources.psffluxadu()[0].std() == pytest.approx( 170000.0, rel=1e-2 ) - - assert ds.sources.provenance is not None - assert ds.sources.provenance == ds.psf.provenance - assert ds.sources.provenance.process == 'extraction' - - assert ds.image.bkg_mean_estimate == pytest.approx( 179.82, abs=0.1 ) - assert ds.image.bkg_rms_estimate == pytest.approx( 7.533, abs=0.01 ) - - from sqlalchemy.exc import IntegrityError - - try: - ds.save_and_commit() - - # Make sure all the files exist - archive = get_archive_object() - imdir = pathlib.Path( FileOnDiskMixin.local_path ) - relpaths = [] - relpaths += [ds.image.filepath + ext for ext in ds.image.filepath_extensions] - relpaths += [ds.sources.filepath] - relpaths += [ds.psf.filepath + ext for ext in ds.psf.filepath_extensions] - for relp in relpaths: - assert ( imdir / relp ).is_file() - assert archive.get_info( relp ) is not None - - # Make sure the bkg fields in the image database table aren't empty - - with SmartSession() as sess: - imgs = sess.query( Image ).filter( Image.id == ds.image.id ).all() - assert len(imgs) == 1 - assert imgs[0].bkg_mean_estimate == pytest.approx( 179.82, abs=0.1 ) - assert imgs[0].bkg_rms_estimate == pytest.approx( 7.533, abs=0.01 ) - - finally: - ds.delete_everything() - def test_warnings_and_exceptions(decam_datastore, extractor): extractor.pars.inject_warnings = 1 diff --git a/tests/pipeline/test_measuring.py b/tests/pipeline/test_measuring.py index 6d529548..24457dc4 100644 --- a/tests/pipeline/test_measuring.py +++ b/tests/pipeline/test_measuring.py @@ -103,8 +103,8 @@ def test_measuring(measurer, decam_cutouts, decam_default_calibrators): assert np.allclose(m.flux_apertures, 100) # aperture is irrelevant for delta function assert m.flux_psf > 150 # flux is more focused than the PSF, so it will bias the flux to be higher than 100 - assert m.background == 0 - assert m.background_err == 0 + assert m.bkg_mean == 0 + assert m.bkg_std == 0 for i in range(3): # check only the last apertures, that are smaller than cutout square assert m.area_apertures[i] == pytest.approx(np.pi * (m.aper_radii[i] + 0.5) ** 2, rel=0.1) @@ -117,8 +117,8 @@ def test_measuring(measurer, decam_cutouts, decam_default_calibrators): assert np.allclose(m.flux_apertures, 200) assert m.flux_psf > 300 # flux is more focused than the PSF, so it will bias the flux to be higher than 100 - assert m.background == 0 - assert m.background_err == 0 + assert m.bkg_mean == 0 + assert m.bkg_std == 0 m = ds.all_measurements[2] # gaussian assert m.disqualifier_scores['negatives'] < 1.0 @@ -127,13 +127,12 @@ def test_measuring(measurer, decam_cutouts, decam_default_calibrators): assert m.disqualifier_scores['filter bank'] == 0 assert m.get_filter_description() == f'PSF match (FWHM= 1.00 x {fwhm:.2f})' - assert m.flux_apertures[0] < 900 - assert m.flux_apertures[1] < 1000 - for i in range(2, len(m.flux_apertures)): + assert m.flux_apertures[0] < 1000 + for i in range(1, len(m.flux_apertures)): assert m.flux_apertures[i] == pytest.approx(1000, rel=0.1) assert m.flux_psf == pytest.approx(1000, rel=0.1) - assert m.background == pytest.approx(0, abs=0.01) - assert m.background_err == pytest.approx(0, abs=0.01) + assert m.bkg_mean == pytest.approx(0, abs=0.01) + assert m.bkg_std == pytest.approx(0, abs=0.01) # TODO: add test for PSF flux when it is implemented @@ -143,13 +142,12 @@ def test_measuring(measurer, decam_cutouts, decam_default_calibrators): assert m.disqualifier_scores['offsets'] == pytest.approx(np.sqrt(2 ** 2 + 3 ** 2), abs=1.0) assert m.disqualifier_scores['filter bank'] == 0 - assert m.flux_apertures[0] < 450 - assert m.flux_apertures[1] < 500 - for i in range(2, len(m.flux_apertures)): + assert m.flux_apertures[0] < 500 + for i in range(1, len(m.flux_apertures)): assert m.flux_apertures[i] == pytest.approx(500, rel=0.1) assert m.flux_psf == pytest.approx(500, rel=0.1) - assert m.background == pytest.approx(0, abs=0.01) - assert m.background_err == pytest.approx(0, abs=0.01) + assert m.bkg_mean == pytest.approx(0, abs=0.01) + assert m.bkg_std == pytest.approx(0, abs=0.01) m = ds.all_measurements[4] # dipole assert m.disqualifier_scores['negatives'] == pytest.approx(1.0, abs=0.1) @@ -160,9 +158,8 @@ def test_measuring(measurer, decam_cutouts, decam_default_calibrators): # the dipole's large offsets will short-circuit the iterative repositioning of the aperture (should be flagged!) assert all(np.isnan(m.flux_apertures)) assert all(np.isnan(m.area_apertures)) - assert m.background == 0 - assert m.background_err == 0 - assert m.background_err == 0 + assert m.bkg_std == 0 + assert m.bkg_std == 0 m = ds.all_measurements[5] # shifted gaussian with noise assert m.disqualifier_scores['negatives'] < 1.0 @@ -171,15 +168,14 @@ def test_measuring(measurer, decam_cutouts, decam_default_calibrators): assert m.disqualifier_scores['filter bank'] == 0 assert m.get_filter_description() == f'PSF match (FWHM= 1.00 x {fwhm:.2f})' - assert m.flux_apertures[0] < 450 - assert m.flux_apertures[1] < 500 - for i in range(2, len(m.flux_apertures)): + assert m.flux_apertures[0] < 500 + for i in range(1, len(m.flux_apertures)): assert m.flux_apertures[i] == pytest.approx(500, rel=0.1) m = ds.all_measurements[6] # dipole with noise assert m.disqualifier_scores['negatives'] == pytest.approx(1.0, abs=0.2) assert m.disqualifier_scores['bad pixels'] == 0 - assert m.disqualifier_scores['offsets'] > 10 + assert m.disqualifier_scores['offsets'] > 1 assert m.disqualifier_scores['filter bank'] > 0 m = ds.all_measurements[7] # delta function with bad pixel @@ -209,14 +205,13 @@ def test_measuring(measurer, decam_cutouts, decam_default_calibrators): assert m.disqualifier_scores['filter bank'] == 2 assert m.get_filter_description() == f'PSF mismatch (FWHM= 2.00 x {fwhm:.2f})' - assert m.flux_apertures[0] < 400 - assert m.flux_apertures[1] < 600 - for i in range(2, len(m.flux_apertures)): + assert m.flux_apertures[0] < 600 + for i in range(1, len(m.flux_apertures)): assert m.flux_apertures[i] == pytest.approx(1000, rel=1) assert m.flux_psf < 500 # flux is more spread out than the PSF, so it will bias the flux to be lower - assert m.background == pytest.approx(0, abs=0.2) - assert m.background_err == pytest.approx(1.0, abs=0.2) + assert m.bkg_mean == pytest.approx(0, abs=0.2) + assert m.bkg_std == pytest.approx(1.0, abs=0.2) m = ds.all_measurements[11] # streak assert m.disqualifier_scores['negatives'] < 0.5 @@ -224,8 +219,8 @@ def test_measuring(measurer, decam_cutouts, decam_default_calibrators): assert m.disqualifier_scores['offsets'] < 0.7 assert m.disqualifier_scores['filter bank'] == 28 assert m.get_filter_description() == 'Streaked (angle= 25.0 deg)' - assert m.background < 0.5 - assert m.background_err < 3.0 + assert m.bkg_mean < 0.5 + assert m.bkg_std < 3.0 m = ds.all_measurements[12] # regular cutout with a bad flag assert m.disqualifier_scores['bad_flag'] == 2 ** 41 # this is the bit for 'cosmic ray' diff --git a/tests/pipeline/test_photo_cal.py b/tests/pipeline/test_photo_cal.py index ceb5f8eb..ee274a61 100644 --- a/tests/pipeline/test_photo_cal.py +++ b/tests/pipeline/test_photo_cal.py @@ -79,4 +79,4 @@ def test_warnings_and_exceptions(decam_datastore, photometor): ds = photometor.run(decam_datastore) ds.reraise() assert "Exception injected by pipeline parameters in process 'photo_cal'." in str(excinfo.value) - ds.read_exception() \ No newline at end of file + ds.read_exception() diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index df045e5b..4ab0bfb3 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -170,7 +170,7 @@ def test_parameters( test_config ): 'subtraction': { 'method': 'override' }, 'detection': { 'threshold': 3.14 }, 'cutting': { 'cutout_size': 666 }, - 'measuring': { 'chosen_aperture': 1 } + 'measuring': { 'outlier_sigma': 3.5 } } pipelinemodule = { 'preprocessing': 'preprocessor', @@ -209,6 +209,7 @@ def test_data_flow(decam_exposure, decam_reference, decam_default_calibrators, a sec_id = ref.section_id try: # cleanup the file at the end p = Pipeline() + p.pars.save_before_subtraction = False assert p.extractor.pars.threshold != 3.14 assert p.detector.pars.threshold != 3.14 @@ -230,8 +231,8 @@ def test_data_flow(decam_exposure, decam_reference, decam_default_calibrators, a check_datastore_and_database_have_everything(exposure.id, sec_id, ref.image.id, session, ds) - # feed the pipeline the same data, but missing the upstream data. TODO: add cutouts and measurements - attributes = ['image', 'sources', 'wcs', 'zp', 'sub_image', 'detections'] + # feed the pipeline the same data, but missing the upstream data. + attributes = ['image', 'sources', 'sub_image', 'detections', 'cutouts', 'measurements'] for i in range(len(attributes)): for j in range(i + 1): @@ -286,6 +287,7 @@ def test_bitflag_propagation(decam_exposure, decam_reference, decam_default_cali try: # cleanup the file at the end p = Pipeline() + p.pars.save_before_subtraction = False exposure.badness = 'banding' # add a bitflag to check for propagation # first run the pipeline and check for basic propagation of the single bitflag @@ -295,6 +297,7 @@ def test_bitflag_propagation(decam_exposure, decam_reference, decam_default_cali assert ds.image._upstream_bitflag == 2 assert ds.sources._upstream_bitflag == 2 assert ds.psf._upstream_bitflag == 2 + assert ds.bg._upstream_bitflag == 2 assert ds.wcs._upstream_bitflag == 2 assert ds.zp._upstream_bitflag == 2 assert ds.sub_image._upstream_bitflag == 2 @@ -305,6 +308,7 @@ def test_bitflag_propagation(decam_exposure, decam_reference, decam_default_cali # test part 2: Add a second bitflag partway through and check it propagates to downstreams # delete downstreams of ds.sources + ds.bg = None ds.wcs = None ds.zp = None ds.sub_image = None @@ -406,16 +410,20 @@ def test_get_upstreams_and_downstreams(decam_exposure, decam_reference, decam_de assert [upstream.id for upstream in ds.wcs.get_upstreams(session)] == [ds.sources.id] assert [upstream.id for upstream in ds.psf.get_upstreams(session)] == [ds.image.id] assert [upstream.id for upstream in ds.zp.get_upstreams(session)] == [ds.sources.id] - assert [upstream.id for upstream in ds.sub_image.get_upstreams(session)] == [ref.image.id, - ref.image.sources.id, - ref.image.psf.id, - ref.image.wcs.id, - ref.image.zp.id, - ds.image.id, - ds.sources.id, - ds.psf.id, - ds.wcs.id, - ds.zp.id] + assert set([upstream.id for upstream in ds.sub_image.get_upstreams(session)]) == set([ + ref.image.id, + ref.image.sources.id, + ref.image.psf.id, + ref.image.bg.id, + ref.image.wcs.id, + ref.image.zp.id, + ds.image.id, + ds.sources.id, + ds.psf.id, + ds.bg.id, + ds.wcs.id, + ds.zp.id, + ]) assert [upstream.id for upstream in ds.detections.get_upstreams(session)] == [ds.sub_image.id] for cutout in ds.cutouts: assert [upstream.id for upstream in cutout.get_upstreams(session)] == [ds.detections.id] @@ -428,11 +436,14 @@ def test_get_upstreams_and_downstreams(decam_exposure, decam_reference, decam_de # test get_downstreams assert [downstream.id for downstream in ds.exposure.get_downstreams(session)] == [ds.image.id] - assert set([downstream.id for downstream in ds.image.get_downstreams(session)]) == set([ds.psf.id, - ds.sources.id, - ds.wcs.id, - ds.zp.id, - ds.sub_image.id]) + assert set([downstream.id for downstream in ds.image.get_downstreams(session)]) == set([ + ds.sources.id, + ds.psf.id, + ds.bg.id, + ds.wcs.id, + ds.zp.id, + ds.sub_image.id + ]) assert [downstream.id for downstream in ds.sources.get_downstreams(session)] == [ds.sub_image.id] assert [downstream.id for downstream in ds.psf.get_downstreams(session)] == [ds.sub_image.id] assert [downstream.id for downstream in ds.wcs.get_downstreams(session)] == [ds.sub_image.id] @@ -539,6 +550,7 @@ def test_inject_warnings_errors(decam_datastore, decam_reference, pipeline_for_t obj_to_process_name = { 'preprocessor': 'preprocessing', 'extractor': 'detection', + 'backgrounder': 'backgrounding', 'astrometor': 'astro_cal', 'photometor': 'photo_cal', 'subtractor': 'subtraction', diff --git a/tests/pipeline/test_subtraction.py b/tests/pipeline/test_subtraction.py index 9ff3faf3..5998802c 100644 --- a/tests/pipeline/test_subtraction.py +++ b/tests/pipeline/test_subtraction.py @@ -72,10 +72,8 @@ def test_subtraction_ptf_zogy(ptf_ref, ptf_supernova_images, subtractor): S[ds.sub_image.flags > 0] = np.nan mu, sigma = sigma_clipping(S) - # assert abs(mu) < 0.01 # the mean should be close to zero - assert abs(mu) < 0.2 # this is not working perfectly, we need to improve the background removal! - # assert abs(sigma - 1) < 0.1 # the standard deviation should be close to 1 - assert abs(sigma - 1) < 1 # the standard deviation may be also affected by background... + assert abs(mu) < 0.1 # the mean should be close to zero + assert abs(sigma - 1) < 0.1 # the standard deviation should be close to 1 def test_warnings_and_exceptions(decam_datastore, decam_reference, subtractor, decam_default_calibrators): diff --git a/util/util.py b/util/util.py index 8c68ccac..37051f0a 100644 --- a/util/util.py +++ b/util/util.py @@ -18,6 +18,7 @@ from models.base import SmartSession, safe_mkdir + def ensure_file_does_not_exist( filepath, delete=False ): """Check if a file exists. Delete it, or raise an exception, if it does. @@ -348,7 +349,7 @@ def save_fits_image_file(filename, data, header, extname=None, overwrite=True, s The path to the file saved (or written to) """ - + filename = str(filename) # handle pathlib.Path objects hdu = fits.ImageHDU( data, name=extname ) if single_file else fits.PrimaryHDU( data ) if isinstance( header, fits.Header ):