From ba048cbba8cb614b615795bb05353e5829e92d21 Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Fri, 19 Jan 2024 18:11:38 +0100 Subject: [PATCH 1/3] Apply database changes for wet runs in migrate_images() This ends up making it a bit easier to test database changes, and it's safe since the migrations always create a new database anyway. --- damnit/migrations.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/damnit/migrations.py b/damnit/migrations.py index f9dba3ec..96c26717 100644 --- a/damnit/migrations.py +++ b/damnit/migrations.py @@ -9,13 +9,16 @@ from .ctxsupport.ctxrunner import generate_thumbnail, add_to_h5_file, DataType -""" -Image thumbnails were previously generated by the GUI, now they're generated -by the backend. This command will convert old image summaries (2D arrays) into -thumbnails (RGBA arrays) that can be used directly by the GUI. -""" -def migrate_images(db, db_dir, dry_run): - proposal = db.metameta.get("proposal") +def migrate_images(new_db, db_dir, dry_run): + """ + Image thumbnails were previously generated by the GUI, now they're generated + by the backend. This function will convert old image summaries (2D arrays) into + thumbnails (RGBA arrays) that can be used directly by the GUI. + + Be careful to pass in the *new database* handle instead of the old one, + since this will modify the DB even when dry_run=False. + """ + proposal = new_db.metameta.get("proposal") if proposal is None: raise RuntimeError("Database must have a proposal configured for it to be migrated.") @@ -51,8 +54,7 @@ def migrate_images(db, db_dir, dry_run): # And then update the summaries in the database for run, run_reduced_data in reduced_data.items(): - if not dry_run: - add_to_db(run_reduced_data, db, proposal, run) + add_to_db(run_reduced_data, new_db, proposal, run) info = f"updated {len(reduced_data)} variables in {len(files_modified)} files" if dry_run: From 6602850c5d6fdd0927a24878d22210322b14fbfa Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Fri, 19 Jan 2024 18:12:40 +0100 Subject: [PATCH 2/3] Move table-copying code in the migrations to `copy_table()` --- damnit/migrations.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/damnit/migrations.py b/damnit/migrations.py index 96c26717..42e6c2d5 100644 --- a/damnit/migrations.py +++ b/damnit/migrations.py @@ -141,6 +141,18 @@ def main_dataset(grp: h5py.Group): return grp[candidates.pop()] +def copy_table(table, old_db, new_db): + """Copy an entire table from one database to another.""" + rows = old_db.conn.execute(f"SELECT * FROM {table}").fetchall() + if len(rows) == 0: + return + + placeholder = ", ".join(["?" for _ in rows[0]]) + new_db.conn.executemany(f""" + INSERT INTO {table} + VALUES ({placeholder}) + """, rows) + def migrate_v0_to_v1(db, db_dir, dry_run): """ For reference, see the V0_SCHEMA variable in db.py. @@ -209,15 +221,7 @@ def migrate_v0_to_v1(db, db_dir, dry_run): # Copy the user-editable variables and standalone comments for table in ["variables", "time_comments"]: - rows = db.conn.execute(f"SELECT * FROM {table}").fetchall() - if len(rows) == 0: - continue - - placeholder = ", ".join(["?" for _ in rows[0]]) - new_db.conn.executemany(f""" - INSERT INTO {table} - VALUES ({placeholder}) - """, rows) + copy_table(table, db, new_db) # Load the data into the new database total_vars = 0 From 3048e2c7e45f1fafd2736ed1f15f84b12c2018ef Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Fri, 19 Jan 2024 18:13:13 +0100 Subject: [PATCH 3/3] Add a migration for the short-lived intermediate v1 format --- damnit/cli.py | 9 ++++- damnit/migrations.py | 89 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/damnit/cli.py b/damnit/cli.py index ba196f6d..3e8e92e7 100644 --- a/damnit/cli.py +++ b/damnit/cli.py @@ -149,6 +149,11 @@ def main(): "v0-to-v1", help="Migrate the SQLite database and HDF5 files from v0 to v1." ) + migrate_subparsers.add_parser( + "intermediate-v1", + help="Migrate the SQLite database HDF5 files from an initial implementation of v1 to the final" + " v1. Don't use this unless you know what you're doing." + ) args = ap.parse_args() logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO, @@ -241,12 +246,14 @@ def main(): elif args.subcmd == "migrate": from .backend.db import DamnitDB - from .migrations import migrate_v0_to_v1 + from .migrations import migrate_intermediate_v1, migrate_v0_to_v1 db = DamnitDB(allow_old=True) if args.migrate_subcmd == "v0-to-v1": migrate_v0_to_v1(db, Path.cwd(), args.dry_run) + elif args.migrate_subcmd == "intermediate-v1": + migrate_intermediate_v1(db, Path.cwd(), args.dry_run) if __name__ == '__main__': sys.exit(main()) diff --git a/damnit/migrations.py b/damnit/migrations.py index 42e6c2d5..3eeca658 100644 --- a/damnit/migrations.py +++ b/damnit/migrations.py @@ -279,3 +279,92 @@ def migrate_v0_to_v1(db, db_dir, dry_run): new_db_path.rename(db_path) print(f"New format DB created and moved to {db_path.name}") print(f"Old database backed up as {backup_path.name}") + +def migrate_intermediate_v1(db, db_dir, dry_run): + """Migrate intermediate v1 (v0.5) databases. + + Before v1 rose over the world, resplendent and glorious, there was a humble + antecedent that was used for some proposals: + - p3338 (FXE) + - p6616 (FXE) + - p4507 (FXE) + - p5639 (SXP) + - p4656 (MID) + - p3118 (MID) + - p6976 (MID) + - p4559 (MID) + - p5397 (MID) + - p4239 (MID) + - p4442 (MID) + - p2956 (SCS) + + To push these databases into their destiny of v1 we must make some changes: + - Remove the `run_variables.stored_type` column + - Re-do image migration to convert the thumbnails to PNGs + - Move the `stored_type` attribute on `.reduced/` datasets to a + `_damnit_objtype` attribute on the `` group. + """ + # Create a new database, overwriting any previous attempts + new_db_path = db_dir / "runs.v1.sqlite" + new_db_path.unlink(missing_ok=True) + new_db = DamnitDB(new_db_path) + + # Copy everything but `run_variables` to the new database + for k, v in db.metameta.items(): + if k != "data_format_version": + new_db.metameta[k] = v + + for table in ["run_info", "time_comments", "variables"]: + copy_table(table, db, new_db) + + # Note that we skip the `stored_type` column because that was removed + run_variables = db.conn.execute(""" + SELECT proposal, run, name, version, value, timestamp, max_diff, provenance + FROM run_variables + """).fetchall() + for row in run_variables: + new_db.conn.execute(""" + INSERT INTO run_variables (proposal, run, name, version, value, timestamp, max_diff, provenance) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, row) + + new_db.update_views() + + # Convert the old `stored_type` attribute into `_damnit_objtype` + runs = db.conn.execute("SELECT proposal, run FROM runs").fetchall() + for proposal, run_no in runs: + h5_path = db_dir / "extracted_data" / f"p{proposal}_r{run_no}.h5" + if not h5_path.is_file(): + continue + + with add_to_h5_file(h5_path) as f: + reduced = f[".reduced"] + for ds_name, dset in reduced.items(): + if "stored_type" in dset.attrs: + stored_type = dset.attrs["stored_type"] + + obj_type = None + if stored_type in ["DataArray", "Dataset", "image", "timestamp"]: + obj_type = stored_type.lower() + + if not dry_run: + if obj_type is not None: + f[ds_name].attrs["_damnit_objtype"] = obj_type + del dset.attrs["stored_type"] + + # Migrate images to use PNGs for thumbnails + migrate_images(new_db, db_dir, dry_run) + + new_db.close() + db.close() + + if dry_run: + print(f"Dry-run: new format DB created at {new_db_path.name}") + print("If all seems OK, re-run the migration without --dry-run.") + else: + db_path = db_dir / DB_NAME + backup_path = db_dir / "runs.intermediate-v1-backup.sqlite" + db_path.rename(backup_path) + new_db_path.rename(db_path) + print(f"New format DB created and moved to {db_path.name}") + print(f"Old database backed up as {backup_path.name}")