Skip to content

Commit

Permalink
Merge pull request #176 from European-XFEL/intermediate-migration
Browse files Browse the repository at this point in the history
Add a migration for the short-lived intermediate v1 format
  • Loading branch information
JamesWrigley authored Jan 20, 2024
2 parents d2a948d + 3048e2c commit 68b6a01
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 19 deletions.
9 changes: 8 additions & 1 deletion damnit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ def main():
"v0-to-v1",
help="Migrate the SQLite database and HDF5 files from v0 to v1."
)
migrate_subparsers.add_parser(
"intermediate-v1",
help="Migrate the SQLite database HDF5 files from an initial implementation of v1 to the final"
" v1. Don't use this unless you know what you're doing."
)

args = ap.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO,
Expand Down Expand Up @@ -241,12 +246,14 @@ def main():

elif args.subcmd == "migrate":
from .backend.db import DamnitDB
from .migrations import migrate_v0_to_v1
from .migrations import migrate_intermediate_v1, migrate_v0_to_v1

db = DamnitDB(allow_old=True)

if args.migrate_subcmd == "v0-to-v1":
migrate_v0_to_v1(db, Path.cwd(), args.dry_run)
elif args.migrate_subcmd == "intermediate-v1":
migrate_intermediate_v1(db, Path.cwd(), args.dry_run)

if __name__ == '__main__':
sys.exit(main())
131 changes: 113 additions & 18 deletions damnit/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
from .ctxsupport.ctxrunner import generate_thumbnail, add_to_h5_file, DataType


"""
Image thumbnails were previously generated by the GUI, now they're generated
by the backend. This command will convert old image summaries (2D arrays) into
thumbnails (RGBA arrays) that can be used directly by the GUI.
"""
def migrate_images(db, db_dir, dry_run):
proposal = db.metameta.get("proposal")
def migrate_images(new_db, db_dir, dry_run):
"""
Image thumbnails were previously generated by the GUI, now they're generated
by the backend. This function will convert old image summaries (2D arrays) into
thumbnails (RGBA arrays) that can be used directly by the GUI.
Be careful to pass in the *new database* handle instead of the old one,
since this will modify the DB even when dry_run=False.
"""
proposal = new_db.metameta.get("proposal")
if proposal is None:
raise RuntimeError("Database must have a proposal configured for it to be migrated.")

Expand Down Expand Up @@ -51,8 +54,7 @@ def migrate_images(db, db_dir, dry_run):

# And then update the summaries in the database
for run, run_reduced_data in reduced_data.items():
if not dry_run:
add_to_db(run_reduced_data, db, proposal, run)
add_to_db(run_reduced_data, new_db, proposal, run)

info = f"updated {len(reduced_data)} variables in {len(files_modified)} files"
if dry_run:
Expand Down Expand Up @@ -139,6 +141,18 @@ def main_dataset(grp: h5py.Group):
return grp[candidates.pop()]


def copy_table(table, old_db, new_db):
"""Copy an entire table from one database to another."""
rows = old_db.conn.execute(f"SELECT * FROM {table}").fetchall()
if len(rows) == 0:
return

placeholder = ", ".join(["?" for _ in rows[0]])
new_db.conn.executemany(f"""
INSERT INTO {table}
VALUES ({placeholder})
""", rows)

def migrate_v0_to_v1(db, db_dir, dry_run):
"""
For reference, see the V0_SCHEMA variable in db.py.
Expand Down Expand Up @@ -207,15 +221,7 @@ def migrate_v0_to_v1(db, db_dir, dry_run):

# Copy the user-editable variables and standalone comments
for table in ["variables", "time_comments"]:
rows = db.conn.execute(f"SELECT * FROM {table}").fetchall()
if len(rows) == 0:
continue

placeholder = ", ".join(["?" for _ in rows[0]])
new_db.conn.executemany(f"""
INSERT INTO {table}
VALUES ({placeholder})
""", rows)
copy_table(table, db, new_db)

# Load the data into the new database
total_vars = 0
Expand Down Expand Up @@ -273,3 +279,92 @@ def migrate_v0_to_v1(db, db_dir, dry_run):
new_db_path.rename(db_path)
print(f"New format DB created and moved to {db_path.name}")
print(f"Old database backed up as {backup_path.name}")

def migrate_intermediate_v1(db, db_dir, dry_run):
"""Migrate intermediate v1 (v0.5) databases.
Before v1 rose over the world, resplendent and glorious, there was a humble
antecedent that was used for some proposals:
- p3338 (FXE)
- p6616 (FXE)
- p4507 (FXE)
- p5639 (SXP)
- p4656 (MID)
- p3118 (MID)
- p6976 (MID)
- p4559 (MID)
- p5397 (MID)
- p4239 (MID)
- p4442 (MID)
- p2956 (SCS)
To push these databases into their destiny of v1 we must make some changes:
- Remove the `run_variables.stored_type` column
- Re-do image migration to convert the thumbnails to PNGs
- Move the `stored_type` attribute on `.reduced/<var>` datasets to a
`_damnit_objtype` attribute on the `<var>` group.
"""
# Create a new database, overwriting any previous attempts
new_db_path = db_dir / "runs.v1.sqlite"
new_db_path.unlink(missing_ok=True)
new_db = DamnitDB(new_db_path)

# Copy everything but `run_variables` to the new database
for k, v in db.metameta.items():
if k != "data_format_version":
new_db.metameta[k] = v

for table in ["run_info", "time_comments", "variables"]:
copy_table(table, db, new_db)

# Note that we skip the `stored_type` column because that was removed
run_variables = db.conn.execute("""
SELECT proposal, run, name, version, value, timestamp, max_diff, provenance
FROM run_variables
""").fetchall()
for row in run_variables:
new_db.conn.execute("""
INSERT INTO run_variables (proposal, run, name, version, value, timestamp, max_diff, provenance)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", row)

new_db.update_views()

# Convert the old `stored_type` attribute into `_damnit_objtype`
runs = db.conn.execute("SELECT proposal, run FROM runs").fetchall()
for proposal, run_no in runs:
h5_path = db_dir / "extracted_data" / f"p{proposal}_r{run_no}.h5"
if not h5_path.is_file():
continue

with add_to_h5_file(h5_path) as f:
reduced = f[".reduced"]
for ds_name, dset in reduced.items():
if "stored_type" in dset.attrs:
stored_type = dset.attrs["stored_type"]

obj_type = None
if stored_type in ["DataArray", "Dataset", "image", "timestamp"]:
obj_type = stored_type.lower()

if not dry_run:
if obj_type is not None:
f[ds_name].attrs["_damnit_objtype"] = obj_type
del dset.attrs["stored_type"]

# Migrate images to use PNGs for thumbnails
migrate_images(new_db, db_dir, dry_run)

new_db.close()
db.close()

if dry_run:
print(f"Dry-run: new format DB created at {new_db_path.name}")
print("If all seems OK, re-run the migration without --dry-run.")
else:
db_path = db_dir / DB_NAME
backup_path = db_dir / "runs.intermediate-v1-backup.sqlite"
db_path.rename(backup_path)
new_db_path.rename(db_path)
print(f"New format DB created and moved to {db_path.name}")
print(f"Old database backed up as {backup_path.name}")

0 comments on commit 68b6a01

Please sign in to comment.