From cc294505cc85a5605911d185945db6c6886250ee Mon Sep 17 00:00:00 2001 From: Michael Buckley Date: Thu, 14 Dec 2023 18:27:52 -0500 Subject: [PATCH] Merge the fhir_id copy migration with the fhir_id fix to avoid traversing hfj_resource twice. (#5552) Turn off the original migration ForceIdMigrationCopyTask. Fix it anyway so nobody copies bad code. Also add another migration ForceIdMigrationFixTask that fixes the bad data, as well as fills in the fhir_id column for new migrations. --- .../uhn/hapi/fhir/changelog/6_10_1/upgrade.md | 5 +++ .../7_0_0/5546-bad_force_id_spaces.yaml | 3 +- .../tasks/HapiFhirJpaMigrationTasks.java | 13 ++++++- .../jpa/embedded/HapiSchemaMigrationTest.java | 10 +++-- .../taskdef/ForceIdMigrationFixTask.java | 37 ++++++++++++++++++- 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_10_1/upgrade.md b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_10_1/upgrade.md index e69de29bb2d1..701baf518163 100644 --- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_10_1/upgrade.md +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_10_1/upgrade.md @@ -0,0 +1,5 @@ +### Major Database Change + +This release contains a migration that covers every resource. +This may take several minutes on a larger system (e.g. 10 minutes for 100 million resources). +For zero-downtime, or for larger systems, we recommend you upgrade the schema using the CLI tools. diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/7_0_0/5546-bad_force_id_spaces.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/7_0_0/5546-bad_force_id_spaces.yaml index 16a20b1583e6..714061375129 100644 --- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/7_0_0/5546-bad_force_id_spaces.yaml +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/7_0_0/5546-bad_force_id_spaces.yaml @@ -1,5 +1,6 @@ --- type: fix issue: 5546 +backport: 6.10.1 title: "A database migration added trailing spaces to server-assigned resource ids. - This fix corrects the migration, and adds another migration to fix the errors." + This fix removes the bad migration, and adds another migration to fix the errors." diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/migrate/tasks/HapiFhirJpaMigrationTasks.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/migrate/tasks/HapiFhirJpaMigrationTasks.java index 0c8481b33585..f75f73cd6d98 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/migrate/tasks/HapiFhirJpaMigrationTasks.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/migrate/tasks/HapiFhirJpaMigrationTasks.java @@ -115,10 +115,19 @@ protected void init700() { // Move forced_id constraints to hfj_resource and the new fhir_id column // Note: we leave the HFJ_FORCED_ID.IDX_FORCEDID_TYPE_FID index in place to support old writers for a while. - version.addTask(new ForceIdMigrationCopyTask(version.getRelease(), "20231018.1")); + version.addTask(new ForceIdMigrationCopyTask(version.getRelease(), "20231018.1").setDoNothing(true)); Builder.BuilderWithTableName hfjResource = version.onTable("HFJ_RESOURCE"); - hfjResource.modifyColumn("20231018.2", "FHIR_ID").nonNullable(); + // commented out to make numeric space for the fix task below. + // This constraint can't be enabled until the column is fully populated, and the shipped version of 20231018.1 + // was broken. + // hfjResource.modifyColumn("20231018.2", "FHIR_ID").nonNullable(); + + // this was inserted after the release. + version.addTask(new ForceIdMigrationFixTask(version.getRelease(), "20231018.3")); + + // added back in place of 20231018.2. If 20231018.2 already ran, this is a no-op. + hfjResource.modifyColumn("20231018.4", "FHIR_ID").nonNullable(); hfjResource.dropIndex("20231027.1", "IDX_RES_FHIR_ID"); hfjResource diff --git a/hapi-fhir-jpaserver-test-utilities/src/test/java/ca/uhn/fhir/jpa/embedded/HapiSchemaMigrationTest.java b/hapi-fhir-jpaserver-test-utilities/src/test/java/ca/uhn/fhir/jpa/embedded/HapiSchemaMigrationTest.java index b9168989bce7..e7f2b2f1bd5a 100644 --- a/hapi-fhir-jpaserver-test-utilities/src/test/java/ca/uhn/fhir/jpa/embedded/HapiSchemaMigrationTest.java +++ b/hapi-fhir-jpaserver-test-utilities/src/test/java/ca/uhn/fhir/jpa/embedded/HapiSchemaMigrationTest.java @@ -22,8 +22,8 @@ import javax.sql.DataSource; import java.sql.SQLException; import java.util.Collections; +import java.util.List; import java.util.Properties; -import java.util.Set; import static ca.uhn.fhir.jpa.embedded.HapiEmbeddedDatabasesExtension.FIRST_TESTED_VERSION; import static ca.uhn.fhir.jpa.migrate.SchemaMigrator.HAPI_FHIR_MIGRATION_TABLENAME; @@ -73,7 +73,7 @@ public void testMigration(DriverTypeEnum theDriverType) throws SQLException { VersionEnum[] allVersions = VersionEnum.values(); - Set dataVersions = Set.of( + List dataVersions = List.of( VersionEnum.V5_2_0, VersionEnum.V5_3_0, VersionEnum.V5_4_0, @@ -129,8 +129,10 @@ private static void migrate(DriverTypeEnum theDriverType, DataSource dataSource, private void verifyForcedIdMigration(DataSource theDataSource) throws SQLException { JdbcTemplate jdbcTemplate = new JdbcTemplate(theDataSource); @SuppressWarnings("DataFlowIssue") - int count = jdbcTemplate.queryForObject("select count(1) from hfj_resource where fhir_id <> trim(fhir_id)", Integer.class); - assertEquals(0, count, "no fhir_id should contain a space"); + int nullCount = jdbcTemplate.queryForObject("select count(1) from hfj_resource where fhir_id is null", Integer.class); + assertEquals(0, nullCount, "no fhir_id should be null"); + int trailingSpaceCount = jdbcTemplate.queryForObject("select count(1) from hfj_resource where fhir_id <> trim(fhir_id)", Integer.class); + assertEquals(0, trailingSpaceCount, "no fhir_id should contain a space"); } diff --git a/hapi-fhir-sql-migrate/src/main/java/ca/uhn/fhir/jpa/migrate/taskdef/ForceIdMigrationFixTask.java b/hapi-fhir-sql-migrate/src/main/java/ca/uhn/fhir/jpa/migrate/taskdef/ForceIdMigrationFixTask.java index f2ec08aa1cd5..86e7e21139a4 100644 --- a/hapi-fhir-sql-migrate/src/main/java/ca/uhn/fhir/jpa/migrate/taskdef/ForceIdMigrationFixTask.java +++ b/hapi-fhir-sql-migrate/src/main/java/ca/uhn/fhir/jpa/migrate/taskdef/ForceIdMigrationFixTask.java @@ -31,6 +31,7 @@ /** * Fix for bad version of {@link ForceIdMigrationCopyTask} * The earlier migration had used at cast to char instead of varchar, which is space-padded on Oracle. + * This migration includes the copy action, but also adds a trim() call to fixup the bad server-assigned ids. */ public class ForceIdMigrationFixTask extends BaseTask { private static final Logger ourLog = LoggerFactory.getLogger(ForceIdMigrationFixTask.class); @@ -62,13 +63,47 @@ protected void doExecute() throws SQLException { // run update in batches. int rowsPerBlock = 50; // hfj_resource has roughly 50 rows per 8k block. int batchSize = rowsPerBlock * 2000; // a few thousand IOPS gives a batch size around a second. + ourLog.info( + "About to migrate ids from {} to {} in batches of size {}", + range.getLeft(), + range.getRight(), + batchSize); for (long batchStart = range.getLeft(); batchStart <= range.getRight(); batchStart = batchStart + batchSize) { long batchEnd = batchStart + batchSize; ourLog.info("Migrating client-assigned ids for pids: {}-{}", batchStart, batchEnd); + /* + We have several cases. Two require no action: + 1. client-assigned id, with correct value in fhir_id and row in hfj_forced_id + 2. server-assigned id, with correct value in fhir_id, no row in hfj_forced_id + And three require action: + 3. client-assigned id, no value in fhir_id, but row in hfj_forced_id + 4. server-assigned id, no value in fhir_id, and row in hfj_forced_id + 5. bad migration - server-assigned id, with wrong space-padded value in fhir_id, no row in hfj_forced_id + */ + executeSql( "hfj_resource", - "update hfj_resource set fhir_id = trim(fhir_id) where res_id >= ? and res_id < ?", + "update hfj_resource " + + // coalesce is varargs and chooses the first non-null value, like || + " set fhir_id = coalesce( " + + + // case 5. + " trim(fhir_id), " + + + // case 3 + " (select f.forced_id from hfj_forced_id f where f.resource_pid = res_id), " + + + // case 4 - use pid as fhir_id + " cast(res_id as varchar(64)) " + + " ) " + + + // avoid useless updates on engines that don't check + // skip case 1, 2. Only check 3,4,5 + " where (fhir_id is null or fhir_id <> trim(fhir_id)) " + + + // chunk range. + " and res_id >= ? and res_id < ?", batchStart, batchEnd); }