Skip to content

Commit

Permalink
Background model (#308)
Browse files Browse the repository at this point in the history
Add Background as an Image data product, add backgrounding and Backgrounder as a configurable pipeline step.
  • Loading branch information
guynir42 authored Jun 21, 2024
1 parent 9d7d977 commit 8b4a87c
Show file tree
Hide file tree
Showing 51 changed files with 2,133 additions and 548 deletions.
86 changes: 86 additions & 0 deletions alembic/versions/2024_06_10_1132-a375526c8260_background_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""background table
Revision ID: a375526c8260
Revises: a7dde2327dde
Create Date: 2024-06-10 11:32:39.717922
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = 'a375526c8260'
down_revision = 'a7dde2327dde'
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('backgrounds',
sa.Column('_format', sa.SMALLINT(), nullable=False),
sa.Column('_method', sa.SMALLINT(), nullable=False),
sa.Column('image_id', sa.BigInteger(), nullable=False),
sa.Column('value', sa.Float(), nullable=False),
sa.Column('noise', sa.Float(), nullable=False),
sa.Column('provenance_id', sa.String(), nullable=False),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.Column('modified', sa.DateTime(), nullable=False),
sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False),
sa.Column('filepath_extensions', postgresql.ARRAY(sa.Text(), zero_indexes=True), nullable=True),
sa.Column('md5sum', sa.UUID(), nullable=True),
sa.Column('md5sum_extensions', postgresql.ARRAY(sa.UUID(), zero_indexes=True), nullable=True),
sa.Column('filepath', sa.Text(), nullable=False),
sa.Column('_bitflag', sa.BIGINT(), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('_upstream_bitflag', sa.BIGINT(), nullable=False),
sa.ForeignKeyConstraint(['image_id'], ['images.id'], name='backgrounds_image_id_fkey', ondelete='CASCADE'),
sa.ForeignKeyConstraint(['provenance_id'], ['provenances.id'], name='backgrounds_provenance_id_fkey', ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index('backgrounds_image_id_provenance_index', 'backgrounds', ['image_id', 'provenance_id'], unique=True)
op.create_index(op.f('ix_backgrounds__bitflag'), 'backgrounds', ['_bitflag'], unique=False)
op.create_index(op.f('ix_backgrounds__upstream_bitflag'), 'backgrounds', ['_upstream_bitflag'], unique=False)
op.create_index(op.f('ix_backgrounds_created_at'), 'backgrounds', ['created_at'], unique=False)
op.create_index(op.f('ix_backgrounds_filepath'), 'backgrounds', ['filepath'], unique=True)
op.create_index(op.f('ix_backgrounds_id'), 'backgrounds', ['id'], unique=False)
op.create_index(op.f('ix_backgrounds_image_id'), 'backgrounds', ['image_id'], unique=False)
op.create_index(op.f('ix_backgrounds_noise'), 'backgrounds', ['noise'], unique=False)
op.create_index(op.f('ix_backgrounds_provenance_id'), 'backgrounds', ['provenance_id'], unique=False)
op.create_index(op.f('ix_backgrounds_value'), 'backgrounds', ['value'], unique=False)
op.add_column('source_lists', sa.Column('inf_aper_num', sa.SMALLINT(), nullable=True))
op.add_column('source_lists', sa.Column('best_aper_num', sa.SMALLINT(), nullable=True))
op.drop_column('source_lists', '_inf_aper_num')

op.add_column('measurements', sa.Column('bkg_mean', sa.REAL(), nullable=False))
op.add_column('measurements', sa.Column('bkg_std', sa.REAL(), nullable=False))
op.add_column('measurements', sa.Column('bkg_pix', sa.REAL(), nullable=False))
op.drop_column('measurements', 'background')
op.drop_column('measurements', 'background_err')
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('measurements', sa.Column('background_err', sa.REAL(), autoincrement=False, nullable=False))
op.add_column('measurements', sa.Column('background', sa.REAL(), autoincrement=False, nullable=False))
op.drop_column('measurements', 'bkg_pix')
op.drop_column('measurements', 'bkg_std')
op.drop_column('measurements', 'bkg_mean')

op.add_column('source_lists', sa.Column('_inf_aper_num', sa.SMALLINT(), autoincrement=False, nullable=True))
op.drop_column('source_lists', 'best_aper_num')
op.drop_column('source_lists', 'inf_aper_num')
op.drop_index(op.f('ix_backgrounds_value'), table_name='backgrounds')
op.drop_index(op.f('ix_backgrounds_provenance_id'), table_name='backgrounds')
op.drop_index(op.f('ix_backgrounds_noise'), table_name='backgrounds')
op.drop_index(op.f('ix_backgrounds_image_id'), table_name='backgrounds')
op.drop_index(op.f('ix_backgrounds_id'), table_name='backgrounds')
op.drop_index(op.f('ix_backgrounds_filepath'), table_name='backgrounds')
op.drop_index(op.f('ix_backgrounds_created_at'), table_name='backgrounds')
op.drop_index(op.f('ix_backgrounds__upstream_bitflag'), table_name='backgrounds')
op.drop_index(op.f('ix_backgrounds__bitflag'), table_name='backgrounds')
op.drop_index('backgrounds_image_id_provenance_index', table_name='backgrounds')
op.drop_table('backgrounds')
# ### end Alembic commands ###
19 changes: 15 additions & 4 deletions default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,28 @@ preprocessing:

extraction:
sources:
method: sextractor
measure_psf: true
apertures: [1.0, 2.0, 3.0, 5.0]
inf_aper_num: -1
best_aper_num: 0
aperunit: fwhm
separation_fwhm: 1.0
threshold: 3.0
method: sextractor

subtraction: false
bg:
format: map
method: sep
poly_order: 1
sep_box_size: 128
sep_filt_size: 3
wcs:
cross_match_catalog: gaia_dr3
solution_method: scamp
max_catalog_mag: [20.0]
mag_range_catalog: 4.0
min_catalog_stars: 50
max_sources_to_use: [2000, 1000, 500, 200]

zp:
cross_match_catalog: gaia_dr3
max_catalog_mag: [20.0]
Expand All @@ -118,7 +128,7 @@ cutting:
measuring:
annulus_radii: [10, 15]
annulus_units: pixels
chosen_aperture: 0
use_annulus_for_centroids: true
analytical_cuts: ['negatives', 'bad pixels', 'offsets', 'filter bank']
outlier_sigma: 3.0
bad_pixel_radius: 3.0
Expand Down Expand Up @@ -175,6 +185,7 @@ coaddition:
measure_psf: true
threshold: 3.0
method: sextractor
background_method: zero
# The following are used to override the regular astrometric calibration parameters
wcs:
cross_match_catalog: gaia_dr3
Expand Down
147 changes: 147 additions & 0 deletions docs/troubleshooting_sqla.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
## Troubleshooting SQLAlchemy

Here is a growing list of common issues and solutions for SQLAlchemy.

#### Adding this object again causes a unique constraint violation

This is a common issue when you are trying to add an object to the session that is already on the DB.
Instead, use merge, and make sure to assign the merged object to a variable (often with the same name)
and keep using that. There's no real advantage to using `session.add()` over `session.merge()`.

Example:

```python
obj = session.merge(obj)
```

#### Related objects get added to the session (and database) when they are not supposed to

This is a hard one, where a complex web of relationships is causing SQLAlchemy to add objects to the session
when they are not supposed to.
This happens when you `session.merge()` an object, not just on `session.add()`.
This is especially tricky when you are trying to delete a parent, so you merge it first,
and then you end up adding the children instead.
Usually the relationship will merge and then delete the children using cascades,
but some complex relationships may not work that way.
If you notice things are getting added when they shouldn't, check the session state before committing/flushing.

The places to look are:
```python
session.identity_map.keys()
session.new
session.dirty
session.deleted
```

If unwanted objects appear there, try to `session.expunge()` them before committing, or if they are persistent,
you may need to `session.delete()` them instead.

#### Double adding a related object through cascades

Sometimes when a child is merged (or added) into a session, the parent is not automatically added.
Then, when the parent is added to the session on its own, it gets added as a new object, that can trigger
unique violations (or, worse, just add duplicates).

The root of this problem is that the child object is merged without the parent.
Remember that a merged object is a new copy of the original, only connected to the session.
If you don't cascade the merge to the parent, you can't just assign the parent to the new object.
The parent object still keeps a reference to the old child object, and that one is not on the session.
Instead, make sure the merged child is assigned a merged parent, and that the parent is related

#### Cannot access related children when parent is not in the session

This happens when a parent object is not in the session, but you want to access its children.
The error message is usually something like this:

```
sqlalchemy.orm.exc.DetachedInstanceError: Parent instance <Parent at 0x7f7f7f7f7f7f> is not bound to a Session;
lazy load operation of attribute 'children' cannot proceed
```

This happens under three possible circumstances.
1. The relationship is lazy loaded (which we generally try to avoid).
Check the relationship definition has `lazy='selectin'`.
2. The parent object was loaded as a related object itself, and that loading did not recursively load the children.
Most objects will recursively load related objects of related objects, but in some cases this doesn't work,
in particular when there's a many-to-many relationship via an association table (e.g., Provenance.upstreams).
This is fixed by setting the `join_depth=1` or higher, as documented
[here](https://docs.sqlalchemy.org/en/20/orm/self_referential.html#configuring-self-referential-eager-loading)
3. The session has rolled back, or committed (this option only if you've changed to expire_on_commit=True).
We usually have expire_on_commit=False, so that objects do not get expired when the session is committed.
However, when the session is rolled back, all objects are expired. That means you cannot use related objects,
or even regular attributes, after a rollback. In most cases, a rollback is due to some crash, so having some
errors accessing attributes/relationships while handling exceptions and "gracefully" exiting the program is expected,
and doesn't require too much attention. If, however, you explicitly called a rollback, you should expect to have
expired objects, and should go ahead and `session.refresh()` all the objects you need to use.

#### Parent not in session, update along children is not updated in the database (Warning only)

This is a warning that tells you that even though you added / deleted a child object,
the relationship cannot automatically update the object in the database, because the parent
is not connected to a session.

This is sometimes important but a lot of times meaningless. For example, if you deleted Parent,
and then go on to remove the children from it, it makes little difference that the relationship
is no longer emitting SQL changes, because the parent is going to be deleted anyway.


#### `When initializing mapper Mapper[...], expression '...' failed to locate a name `

This happens when a related object class is not imported when the relationship needs to be instantiated.

When two classes, A and B, are related to each other, we would see a definition like this:

```python
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
b_id = Column(Integer, ForeignKey('b.id'))
b = relationship('B')

class B(Base):
__tablename__ = 'b'
id = Column(Integer, primary_key=True)
a_id = Column(Integer, ForeignKey('a.id'))
a = relationship('A')
```

Notice that the `relationship` function is called with a string argument.
This is because the class `B` is not defined yet when the class `A` is defined.
This solves a "chicken and egg" problem, by making a promise to the mapper that
when the relationships are instatiated, both classes will have been imported.

If some of the related objects are on a different file (module) and that file
is not imported by any of the code you are running, you will get the error above.

This usually happens on scripts and parallel pipelines that only use a subset of the classes.
To fix this, simply import the missing class module at the beginning of the script.


#### Changing the primary key of an object causes update instead of new object

For objects that don't have an auto-incrementing primary key (e.g., Provenance),
the user is in control of the value that goes into the primary key.
Sometimes, the user changes this value, e.g., when a Provenance gets new parameters
and the `update_id()` method is called.

If the object is already in the session, and the primary key is changed, SQLAlchemy
will update the object in the database, instead of creating a new one.
This will remove the old object and may cause problems with objects that relate to
that row in the table.

Make sure to detach your object, or make a brand new one and copy properties over
to the new instance before merging it back into the session as a new object.


#### Deadlocks when querying the database

This can occur when an internal session is querying the same objects
that an external session is using.
In general, you should not be opening an internal session when a different one is open,
instead, pass the session as an argument into the lower scope so all functions use the same session.

If the app freezes, check for a deadlock:
Go into the DB and do `select * from pg_locks;` to see if there are many locks.

Sometimes using `SELECT pg_cancel_backend(pid) FROM pg_locks; ` will free the lock.
Otherwise, try to restart the psql service.
Loading

0 comments on commit 8b4a87c

Please sign in to comment.