Background model (#308)

Add Background as an Image data product, add backgrounding and Backgrounder as a configurable pipeline step.
c3-time-domain · Jun 21, 2024 · 8b4a87c · 8b4a87c
1 parent 9d7d977
commit 8b4a87c
Show file tree

Hide file tree

Showing 51 changed files with 2,133 additions and 548 deletions.
diff --git a/alembic/versions/2024_06_10_1132-a375526c8260_background_table.py b/alembic/versions/2024_06_10_1132-a375526c8260_background_table.py
@@ -0,0 +1,86 @@
+"""background table
+
+Revision ID: a375526c8260
+Revises: a7dde2327dde
+Create Date: 2024-06-10 11:32:39.717922
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = 'a375526c8260'
+down_revision = 'a7dde2327dde'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('backgrounds',
+    sa.Column('_format', sa.SMALLINT(), nullable=False),
+    sa.Column('_method', sa.SMALLINT(), nullable=False),
+    sa.Column('image_id', sa.BigInteger(), nullable=False),
+    sa.Column('value', sa.Float(), nullable=False),
+    sa.Column('noise', sa.Float(), nullable=False),
+    sa.Column('provenance_id', sa.String(), nullable=False),
+    sa.Column('created_at', sa.DateTime(), nullable=False),
+    sa.Column('modified', sa.DateTime(), nullable=False),
+    sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False),
+    sa.Column('filepath_extensions', postgresql.ARRAY(sa.Text(), zero_indexes=True), nullable=True),
+    sa.Column('md5sum', sa.UUID(), nullable=True),
+    sa.Column('md5sum_extensions', postgresql.ARRAY(sa.UUID(), zero_indexes=True), nullable=True),
+    sa.Column('filepath', sa.Text(), nullable=False),
+    sa.Column('_bitflag', sa.BIGINT(), nullable=False),
+    sa.Column('description', sa.Text(), nullable=True),
+    sa.Column('_upstream_bitflag', sa.BIGINT(), nullable=False),
+    sa.ForeignKeyConstraint(['image_id'], ['images.id'], name='backgrounds_image_id_fkey', ondelete='CASCADE'),
+    sa.ForeignKeyConstraint(['provenance_id'], ['provenances.id'], name='backgrounds_provenance_id_fkey', ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index('backgrounds_image_id_provenance_index', 'backgrounds', ['image_id', 'provenance_id'], unique=True)
+    op.create_index(op.f('ix_backgrounds__bitflag'), 'backgrounds', ['_bitflag'], unique=False)
+    op.create_index(op.f('ix_backgrounds__upstream_bitflag'), 'backgrounds', ['_upstream_bitflag'], unique=False)
+    op.create_index(op.f('ix_backgrounds_created_at'), 'backgrounds', ['created_at'], unique=False)
+    op.create_index(op.f('ix_backgrounds_filepath'), 'backgrounds', ['filepath'], unique=True)
+    op.create_index(op.f('ix_backgrounds_id'), 'backgrounds', ['id'], unique=False)
+    op.create_index(op.f('ix_backgrounds_image_id'), 'backgrounds', ['image_id'], unique=False)
+    op.create_index(op.f('ix_backgrounds_noise'), 'backgrounds', ['noise'], unique=False)
+    op.create_index(op.f('ix_backgrounds_provenance_id'), 'backgrounds', ['provenance_id'], unique=False)
+    op.create_index(op.f('ix_backgrounds_value'), 'backgrounds', ['value'], unique=False)
+    op.add_column('source_lists', sa.Column('inf_aper_num', sa.SMALLINT(), nullable=True))
+    op.add_column('source_lists', sa.Column('best_aper_num', sa.SMALLINT(), nullable=True))
+    op.drop_column('source_lists', '_inf_aper_num')
+
+    op.add_column('measurements', sa.Column('bkg_mean', sa.REAL(), nullable=False))
+    op.add_column('measurements', sa.Column('bkg_std', sa.REAL(), nullable=False))
+    op.add_column('measurements', sa.Column('bkg_pix', sa.REAL(), nullable=False))
+    op.drop_column('measurements', 'background')
+    op.drop_column('measurements', 'background_err')
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('measurements', sa.Column('background_err', sa.REAL(), autoincrement=False, nullable=False))
+    op.add_column('measurements', sa.Column('background', sa.REAL(), autoincrement=False, nullable=False))
+    op.drop_column('measurements', 'bkg_pix')
+    op.drop_column('measurements', 'bkg_std')
+    op.drop_column('measurements', 'bkg_mean')
+
+    op.add_column('source_lists', sa.Column('_inf_aper_num', sa.SMALLINT(), autoincrement=False, nullable=True))
+    op.drop_column('source_lists', 'best_aper_num')
+    op.drop_column('source_lists', 'inf_aper_num')
+    op.drop_index(op.f('ix_backgrounds_value'), table_name='backgrounds')
+    op.drop_index(op.f('ix_backgrounds_provenance_id'), table_name='backgrounds')
+    op.drop_index(op.f('ix_backgrounds_noise'), table_name='backgrounds')
+    op.drop_index(op.f('ix_backgrounds_image_id'), table_name='backgrounds')
+    op.drop_index(op.f('ix_backgrounds_id'), table_name='backgrounds')
+    op.drop_index(op.f('ix_backgrounds_filepath'), table_name='backgrounds')
+    op.drop_index(op.f('ix_backgrounds_created_at'), table_name='backgrounds')
+    op.drop_index(op.f('ix_backgrounds__upstream_bitflag'), table_name='backgrounds')
+    op.drop_index(op.f('ix_backgrounds__bitflag'), table_name='backgrounds')
+    op.drop_index('backgrounds_image_id_provenance_index', table_name='backgrounds')
+    op.drop_table('backgrounds')
+    # ### end Alembic commands ###
diff --git a/default_config.yaml b/default_config.yaml
@@ -83,18 +83,28 @@ preprocessing:
 
 extraction:
   sources:
+    method: sextractor
     measure_psf: true
+    apertures: [1.0, 2.0, 3.0, 5.0]
+    inf_aper_num: -1
+    best_aper_num: 0
+    aperunit: fwhm
+    separation_fwhm: 1.0
     threshold: 3.0
-    method: sextractor
-
+    subtraction: false
+  bg:
+    format: map
+    method: sep
+    poly_order: 1
+    sep_box_size: 128
+    sep_filt_size: 3
   wcs:
     cross_match_catalog: gaia_dr3
     solution_method: scamp
     max_catalog_mag: [20.0]
     mag_range_catalog: 4.0
     min_catalog_stars: 50
     max_sources_to_use: [2000, 1000, 500, 200]
-
   zp:
     cross_match_catalog: gaia_dr3
     max_catalog_mag: [20.0]
@@ -118,7 +128,7 @@ cutting:
 measuring:
   annulus_radii: [10, 15]
   annulus_units: pixels
-  chosen_aperture: 0
+  use_annulus_for_centroids: true
   analytical_cuts: ['negatives', 'bad pixels', 'offsets', 'filter bank']
   outlier_sigma: 3.0
   bad_pixel_radius: 3.0
@@ -175,6 +185,7 @@ coaddition:
       measure_psf: true
       threshold: 3.0
       method: sextractor
+      background_method: zero
     # The following are used to override the regular astrometric calibration parameters
     wcs:
       cross_match_catalog: gaia_dr3

diff --git a/docs/troubleshooting_sqla.md b/docs/troubleshooting_sqla.md
@@ -0,0 +1,147 @@
+## Troubleshooting SQLAlchemy
+
+Here is a growing list of common issues and solutions for SQLAlchemy.
+
+#### Adding this object again causes a unique constraint violation
+
+This is a common issue when you are trying to add an object to the session that is already on the DB. 
+Instead, use merge, and make sure to assign the merged object to a variable (often with the same name) 
+and keep using that. There's no real advantage to using `session.add()` over `session.merge()`. 
+
+Example: 
+
+```python
+obj = session.merge(obj)
+```
+
+#### Related objects get added to the session (and database) when they are not supposed to
+
+This is a hard one, where a complex web of relationships is causing SQLAlchemy to add objects to the session 
+when they are not supposed to.
+This happens when you `session.merge()` an object, not just on `session.add()`. 
+This is especially tricky when you are trying to delete a parent, so you merge it first, 
+and then you end up adding the children instead. 
+Usually the relationship will merge and then delete the children using cascades, 
+but some complex relationships may not work that way. 
+If you notice things are getting added when they shouldn't, check the session state before committing/flushing. 
+
+The places to look are: 
+```python
+session.identity_map.keys()
+session.new
+session.dirty
+session.deleted
+```
+
+If unwanted objects appear there, try to `session.expunge()` them before committing, or if they are persistent, 
+you may need to `session.delete()` them instead. 
+
+#### Double adding a related object through cascades
+
+Sometimes when a child is merged (or added) into a session, the parent is not automatically added. 
+Then, when the parent is added to the session on its own, it gets added as a new object, that can trigger
+unique violations (or, worse, just add duplicates). 
+
+The root of this problem is that the child object is merged without the parent. 
+Remember that a merged object is a new copy of the original, only connected to the session. 
+If you don't cascade the merge to the parent, you can't just assign the parent to the new object. 
+The parent object still keeps a reference to the old child object, and that one is not on the session. 
+Instead, make sure the merged child is assigned a merged parent, and that the parent is related 
+
+#### Cannot access related children when parent is not in the session
+
+This happens when a parent object is not in the session, but you want to access its children.
+The error message is usually something like this: 
+
+```
+sqlalchemy.orm.exc.DetachedInstanceError: Parent instance <Parent at 0x7f7f7f7f7f7f> is not bound to a Session; 
+lazy load operation of attribute 'children' cannot proceed
+```
+
+This happens under three possible circumstances. 
+1. The relationship is lazy loaded (which we generally try to avoid). 
+   Check the relationship definition has `lazy='selectin'`.
+2. The parent object was loaded as a related object itself, and that loading did not recursively load the children. 
+   Most objects will recursively load related objects of related objects, but in some cases this doesn't work, 
+   in particular when there's a many-to-many relationship via an association table (e.g., Provenance.upstreams). 
+   This is fixed by setting the `join_depth=1` or higher, as documented 
+   [here](https://docs.sqlalchemy.org/en/20/orm/self_referential.html#configuring-self-referential-eager-loading)
+3. The session has rolled back, or committed (this option only if you've changed to expire_on_commit=True). 
+   We usually have expire_on_commit=False, so that objects do not get expired when the session is committed.
+   However, when the session is rolled back, all objects are expired. That means you cannot use related objects, 
+   or even regular attributes, after a rollback. In most cases, a rollback is due to some crash, so having some 
+   errors accessing attributes/relationships while handling exceptions and "gracefully" exiting the program is expected, 
+   and doesn't require too much attention. If, however, you explicitly called a rollback, you should expect to have 
+   expired objects, and should go ahead and `session.refresh()` all the objects you need to use.
+
+#### Parent not in session, update along children is not updated in the database (Warning only)
+
+This is a warning that tells you that even though you added / deleted a child object, 
+the relationship cannot automatically update the object in the database, because the parent 
+is not connected to a session. 
+
+This is sometimes important but a lot of times meaningless. For example, if you deleted Parent, 
+and then go on to remove the children from it, it makes little difference that the relationship 
+is no longer emitting SQL changes, because the parent is going to be deleted anyway.
+
+
+#### `When initializing mapper Mapper[...], expression '...' failed to locate a name `
+
+This happens when a related object class is not imported when the relationship needs to be instantiated. 
+
+When two classes, A and B, are related to each other, we would see a definition like this: 
+
+```python
+class A(Base):
+    __tablename__ = 'a'
+    id = Column(Integer, primary_key=True)
+    b_id = Column(Integer, ForeignKey('b.id'))
+    b = relationship('B')
+
+class B(Base):
+    __tablename__ = 'b'
+    id = Column(Integer, primary_key=True)
+    a_id = Column(Integer, ForeignKey('a.id'))
+    a = relationship('A')
+```
+
+Notice that the `relationship` function is called with a string argument. 
+This is because the class `B` is not defined yet when the class `A` is defined.
+This solves a "chicken and egg" problem, by making a promise to the mapper that 
+when the relationships are instatiated, both classes will have been imported. 
+
+If some of the related objects are on a different file (module) and that file 
+is not imported by any of the code you are running, you will get the error above.
+
+This usually happens on scripts and parallel pipelines that only use a subset of the classes.
+To fix this, simply import the missing class module at the beginning of the script. 
+
+
+#### Changing the primary key of an object causes update instead of new object
+
+For objects that don't have an auto-incrementing primary key (e.g., Provenance),
+the user is in control of the value that goes into the primary key. 
+Sometimes, the user changes this value, e.g., when a Provenance gets new parameters
+and the `update_id()` method is called. 
+
+If the object is already in the session, and the primary key is changed, SQLAlchemy
+will update the object in the database, instead of creating a new one.
+This will remove the old object and may cause problems with objects that relate to 
+that row in the table. 
+
+Make sure to detach your object, or make a brand new one and copy properties over 
+to the new instance before merging it back into the session as a new object. 
+
+
+#### Deadlocks when querying the database
+
+This can occur when an internal session is querying the same objects 
+that an external session is using. 
+In general, you should not be opening an internal session when a different one is open, 
+instead, pass the session as an argument into the lower scope so all functions use the same session.
+
+If the app freezes, check for a deadlock: 
+Go into the DB and do `select * from pg_locks;` to see if there are many locks.
+
+Sometimes using `SELECT pg_cancel_backend(pid) FROM pg_locks; ` will free the lock. 
+Otherwise, try to restart the psql service.