-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Background as an Image data product, add backgrounding and Backgrounder as a configurable pipeline step.
- Loading branch information
Showing
51 changed files
with
2,133 additions
and
548 deletions.
There are no files selected for viewing
86 changes: 86 additions & 0 deletions
86
alembic/versions/2024_06_10_1132-a375526c8260_background_table.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
"""background table | ||
Revision ID: a375526c8260 | ||
Revises: a7dde2327dde | ||
Create Date: 2024-06-10 11:32:39.717922 | ||
""" | ||
from alembic import op | ||
import sqlalchemy as sa | ||
from sqlalchemy.dialects import postgresql | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = 'a375526c8260' | ||
down_revision = 'a7dde2327dde' | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.create_table('backgrounds', | ||
sa.Column('_format', sa.SMALLINT(), nullable=False), | ||
sa.Column('_method', sa.SMALLINT(), nullable=False), | ||
sa.Column('image_id', sa.BigInteger(), nullable=False), | ||
sa.Column('value', sa.Float(), nullable=False), | ||
sa.Column('noise', sa.Float(), nullable=False), | ||
sa.Column('provenance_id', sa.String(), nullable=False), | ||
sa.Column('created_at', sa.DateTime(), nullable=False), | ||
sa.Column('modified', sa.DateTime(), nullable=False), | ||
sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False), | ||
sa.Column('filepath_extensions', postgresql.ARRAY(sa.Text(), zero_indexes=True), nullable=True), | ||
sa.Column('md5sum', sa.UUID(), nullable=True), | ||
sa.Column('md5sum_extensions', postgresql.ARRAY(sa.UUID(), zero_indexes=True), nullable=True), | ||
sa.Column('filepath', sa.Text(), nullable=False), | ||
sa.Column('_bitflag', sa.BIGINT(), nullable=False), | ||
sa.Column('description', sa.Text(), nullable=True), | ||
sa.Column('_upstream_bitflag', sa.BIGINT(), nullable=False), | ||
sa.ForeignKeyConstraint(['image_id'], ['images.id'], name='backgrounds_image_id_fkey', ondelete='CASCADE'), | ||
sa.ForeignKeyConstraint(['provenance_id'], ['provenances.id'], name='backgrounds_provenance_id_fkey', ondelete='CASCADE'), | ||
sa.PrimaryKeyConstraint('id') | ||
) | ||
op.create_index('backgrounds_image_id_provenance_index', 'backgrounds', ['image_id', 'provenance_id'], unique=True) | ||
op.create_index(op.f('ix_backgrounds__bitflag'), 'backgrounds', ['_bitflag'], unique=False) | ||
op.create_index(op.f('ix_backgrounds__upstream_bitflag'), 'backgrounds', ['_upstream_bitflag'], unique=False) | ||
op.create_index(op.f('ix_backgrounds_created_at'), 'backgrounds', ['created_at'], unique=False) | ||
op.create_index(op.f('ix_backgrounds_filepath'), 'backgrounds', ['filepath'], unique=True) | ||
op.create_index(op.f('ix_backgrounds_id'), 'backgrounds', ['id'], unique=False) | ||
op.create_index(op.f('ix_backgrounds_image_id'), 'backgrounds', ['image_id'], unique=False) | ||
op.create_index(op.f('ix_backgrounds_noise'), 'backgrounds', ['noise'], unique=False) | ||
op.create_index(op.f('ix_backgrounds_provenance_id'), 'backgrounds', ['provenance_id'], unique=False) | ||
op.create_index(op.f('ix_backgrounds_value'), 'backgrounds', ['value'], unique=False) | ||
op.add_column('source_lists', sa.Column('inf_aper_num', sa.SMALLINT(), nullable=True)) | ||
op.add_column('source_lists', sa.Column('best_aper_num', sa.SMALLINT(), nullable=True)) | ||
op.drop_column('source_lists', '_inf_aper_num') | ||
|
||
op.add_column('measurements', sa.Column('bkg_mean', sa.REAL(), nullable=False)) | ||
op.add_column('measurements', sa.Column('bkg_std', sa.REAL(), nullable=False)) | ||
op.add_column('measurements', sa.Column('bkg_pix', sa.REAL(), nullable=False)) | ||
op.drop_column('measurements', 'background') | ||
op.drop_column('measurements', 'background_err') | ||
# ### end Alembic commands ### | ||
|
||
|
||
def downgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.add_column('measurements', sa.Column('background_err', sa.REAL(), autoincrement=False, nullable=False)) | ||
op.add_column('measurements', sa.Column('background', sa.REAL(), autoincrement=False, nullable=False)) | ||
op.drop_column('measurements', 'bkg_pix') | ||
op.drop_column('measurements', 'bkg_std') | ||
op.drop_column('measurements', 'bkg_mean') | ||
|
||
op.add_column('source_lists', sa.Column('_inf_aper_num', sa.SMALLINT(), autoincrement=False, nullable=True)) | ||
op.drop_column('source_lists', 'best_aper_num') | ||
op.drop_column('source_lists', 'inf_aper_num') | ||
op.drop_index(op.f('ix_backgrounds_value'), table_name='backgrounds') | ||
op.drop_index(op.f('ix_backgrounds_provenance_id'), table_name='backgrounds') | ||
op.drop_index(op.f('ix_backgrounds_noise'), table_name='backgrounds') | ||
op.drop_index(op.f('ix_backgrounds_image_id'), table_name='backgrounds') | ||
op.drop_index(op.f('ix_backgrounds_id'), table_name='backgrounds') | ||
op.drop_index(op.f('ix_backgrounds_filepath'), table_name='backgrounds') | ||
op.drop_index(op.f('ix_backgrounds_created_at'), table_name='backgrounds') | ||
op.drop_index(op.f('ix_backgrounds__upstream_bitflag'), table_name='backgrounds') | ||
op.drop_index(op.f('ix_backgrounds__bitflag'), table_name='backgrounds') | ||
op.drop_index('backgrounds_image_id_provenance_index', table_name='backgrounds') | ||
op.drop_table('backgrounds') | ||
# ### end Alembic commands ### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
## Troubleshooting SQLAlchemy | ||
|
||
Here is a growing list of common issues and solutions for SQLAlchemy. | ||
|
||
#### Adding this object again causes a unique constraint violation | ||
|
||
This is a common issue when you are trying to add an object to the session that is already on the DB. | ||
Instead, use merge, and make sure to assign the merged object to a variable (often with the same name) | ||
and keep using that. There's no real advantage to using `session.add()` over `session.merge()`. | ||
|
||
Example: | ||
|
||
```python | ||
obj = session.merge(obj) | ||
``` | ||
|
||
#### Related objects get added to the session (and database) when they are not supposed to | ||
|
||
This is a hard one, where a complex web of relationships is causing SQLAlchemy to add objects to the session | ||
when they are not supposed to. | ||
This happens when you `session.merge()` an object, not just on `session.add()`. | ||
This is especially tricky when you are trying to delete a parent, so you merge it first, | ||
and then you end up adding the children instead. | ||
Usually the relationship will merge and then delete the children using cascades, | ||
but some complex relationships may not work that way. | ||
If you notice things are getting added when they shouldn't, check the session state before committing/flushing. | ||
|
||
The places to look are: | ||
```python | ||
session.identity_map.keys() | ||
session.new | ||
session.dirty | ||
session.deleted | ||
``` | ||
|
||
If unwanted objects appear there, try to `session.expunge()` them before committing, or if they are persistent, | ||
you may need to `session.delete()` them instead. | ||
|
||
#### Double adding a related object through cascades | ||
|
||
Sometimes when a child is merged (or added) into a session, the parent is not automatically added. | ||
Then, when the parent is added to the session on its own, it gets added as a new object, that can trigger | ||
unique violations (or, worse, just add duplicates). | ||
|
||
The root of this problem is that the child object is merged without the parent. | ||
Remember that a merged object is a new copy of the original, only connected to the session. | ||
If you don't cascade the merge to the parent, you can't just assign the parent to the new object. | ||
The parent object still keeps a reference to the old child object, and that one is not on the session. | ||
Instead, make sure the merged child is assigned a merged parent, and that the parent is related | ||
|
||
#### Cannot access related children when parent is not in the session | ||
|
||
This happens when a parent object is not in the session, but you want to access its children. | ||
The error message is usually something like this: | ||
|
||
``` | ||
sqlalchemy.orm.exc.DetachedInstanceError: Parent instance <Parent at 0x7f7f7f7f7f7f> is not bound to a Session; | ||
lazy load operation of attribute 'children' cannot proceed | ||
``` | ||
|
||
This happens under three possible circumstances. | ||
1. The relationship is lazy loaded (which we generally try to avoid). | ||
Check the relationship definition has `lazy='selectin'`. | ||
2. The parent object was loaded as a related object itself, and that loading did not recursively load the children. | ||
Most objects will recursively load related objects of related objects, but in some cases this doesn't work, | ||
in particular when there's a many-to-many relationship via an association table (e.g., Provenance.upstreams). | ||
This is fixed by setting the `join_depth=1` or higher, as documented | ||
[here](https://docs.sqlalchemy.org/en/20/orm/self_referential.html#configuring-self-referential-eager-loading) | ||
3. The session has rolled back, or committed (this option only if you've changed to expire_on_commit=True). | ||
We usually have expire_on_commit=False, so that objects do not get expired when the session is committed. | ||
However, when the session is rolled back, all objects are expired. That means you cannot use related objects, | ||
or even regular attributes, after a rollback. In most cases, a rollback is due to some crash, so having some | ||
errors accessing attributes/relationships while handling exceptions and "gracefully" exiting the program is expected, | ||
and doesn't require too much attention. If, however, you explicitly called a rollback, you should expect to have | ||
expired objects, and should go ahead and `session.refresh()` all the objects you need to use. | ||
|
||
#### Parent not in session, update along children is not updated in the database (Warning only) | ||
|
||
This is a warning that tells you that even though you added / deleted a child object, | ||
the relationship cannot automatically update the object in the database, because the parent | ||
is not connected to a session. | ||
|
||
This is sometimes important but a lot of times meaningless. For example, if you deleted Parent, | ||
and then go on to remove the children from it, it makes little difference that the relationship | ||
is no longer emitting SQL changes, because the parent is going to be deleted anyway. | ||
|
||
|
||
#### `When initializing mapper Mapper[...], expression '...' failed to locate a name ` | ||
|
||
This happens when a related object class is not imported when the relationship needs to be instantiated. | ||
|
||
When two classes, A and B, are related to each other, we would see a definition like this: | ||
|
||
```python | ||
class A(Base): | ||
__tablename__ = 'a' | ||
id = Column(Integer, primary_key=True) | ||
b_id = Column(Integer, ForeignKey('b.id')) | ||
b = relationship('B') | ||
|
||
class B(Base): | ||
__tablename__ = 'b' | ||
id = Column(Integer, primary_key=True) | ||
a_id = Column(Integer, ForeignKey('a.id')) | ||
a = relationship('A') | ||
``` | ||
|
||
Notice that the `relationship` function is called with a string argument. | ||
This is because the class `B` is not defined yet when the class `A` is defined. | ||
This solves a "chicken and egg" problem, by making a promise to the mapper that | ||
when the relationships are instatiated, both classes will have been imported. | ||
|
||
If some of the related objects are on a different file (module) and that file | ||
is not imported by any of the code you are running, you will get the error above. | ||
|
||
This usually happens on scripts and parallel pipelines that only use a subset of the classes. | ||
To fix this, simply import the missing class module at the beginning of the script. | ||
|
||
|
||
#### Changing the primary key of an object causes update instead of new object | ||
|
||
For objects that don't have an auto-incrementing primary key (e.g., Provenance), | ||
the user is in control of the value that goes into the primary key. | ||
Sometimes, the user changes this value, e.g., when a Provenance gets new parameters | ||
and the `update_id()` method is called. | ||
|
||
If the object is already in the session, and the primary key is changed, SQLAlchemy | ||
will update the object in the database, instead of creating a new one. | ||
This will remove the old object and may cause problems with objects that relate to | ||
that row in the table. | ||
|
||
Make sure to detach your object, or make a brand new one and copy properties over | ||
to the new instance before merging it back into the session as a new object. | ||
|
||
|
||
#### Deadlocks when querying the database | ||
|
||
This can occur when an internal session is querying the same objects | ||
that an external session is using. | ||
In general, you should not be opening an internal session when a different one is open, | ||
instead, pass the session as an argument into the lower scope so all functions use the same session. | ||
|
||
If the app freezes, check for a deadlock: | ||
Go into the DB and do `select * from pg_locks;` to see if there are many locks. | ||
|
||
Sometimes using `SELECT pg_cancel_backend(pid) FROM pg_locks; ` will free the lock. | ||
Otherwise, try to restart the psql service. |
Oops, something went wrong.