Skip to content

Commit

Permalink
Merge the fhir_id copy migration with the fhir_id fix to avoid traver…
Browse files Browse the repository at this point in the history
…sing hfj_resource twice. (#5552)

Turn off the original migration ForceIdMigrationCopyTask.
Fix it anyway so nobody copies bad code.
Also add another migration ForceIdMigrationFixTask 
that fixes the bad data, as well as fills in the fhir_id column for new migrations.
  • Loading branch information
michaelabuckley authored Dec 14, 2023
1 parent 71b0987 commit cc29450
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 8 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### Major Database Change

This release contains a migration that covers every resource.
This may take several minutes on a larger system (e.g. 10 minutes for 100 million resources).
For zero-downtime, or for larger systems, we recommend you upgrade the schema using the CLI tools.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
type: fix
issue: 5546
backport: 6.10.1
title: "A database migration added trailing spaces to server-assigned resource ids.
This fix corrects the migration, and adds another migration to fix the errors."
This fix removes the bad migration, and adds another migration to fix the errors."
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,19 @@ protected void init700() {

// Move forced_id constraints to hfj_resource and the new fhir_id column
// Note: we leave the HFJ_FORCED_ID.IDX_FORCEDID_TYPE_FID index in place to support old writers for a while.
version.addTask(new ForceIdMigrationCopyTask(version.getRelease(), "20231018.1"));
version.addTask(new ForceIdMigrationCopyTask(version.getRelease(), "20231018.1").setDoNothing(true));

Builder.BuilderWithTableName hfjResource = version.onTable("HFJ_RESOURCE");
hfjResource.modifyColumn("20231018.2", "FHIR_ID").nonNullable();
// commented out to make numeric space for the fix task below.
// This constraint can't be enabled until the column is fully populated, and the shipped version of 20231018.1
// was broken.
// hfjResource.modifyColumn("20231018.2", "FHIR_ID").nonNullable();

// this was inserted after the release.
version.addTask(new ForceIdMigrationFixTask(version.getRelease(), "20231018.3"));

// added back in place of 20231018.2. If 20231018.2 already ran, this is a no-op.
hfjResource.modifyColumn("20231018.4", "FHIR_ID").nonNullable();

hfjResource.dropIndex("20231027.1", "IDX_RES_FHIR_ID");
hfjResource
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import javax.sql.DataSource;
import java.sql.SQLException;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.Set;

import static ca.uhn.fhir.jpa.embedded.HapiEmbeddedDatabasesExtension.FIRST_TESTED_VERSION;
import static ca.uhn.fhir.jpa.migrate.SchemaMigrator.HAPI_FHIR_MIGRATION_TABLENAME;
Expand Down Expand Up @@ -73,7 +73,7 @@ public void testMigration(DriverTypeEnum theDriverType) throws SQLException {

VersionEnum[] allVersions = VersionEnum.values();

Set<VersionEnum> dataVersions = Set.of(
List<VersionEnum> dataVersions = List.of(
VersionEnum.V5_2_0,
VersionEnum.V5_3_0,
VersionEnum.V5_4_0,
Expand Down Expand Up @@ -129,8 +129,10 @@ private static void migrate(DriverTypeEnum theDriverType, DataSource dataSource,
private void verifyForcedIdMigration(DataSource theDataSource) throws SQLException {
JdbcTemplate jdbcTemplate = new JdbcTemplate(theDataSource);
@SuppressWarnings("DataFlowIssue")
int count = jdbcTemplate.queryForObject("select count(1) from hfj_resource where fhir_id <> trim(fhir_id)", Integer.class);
assertEquals(0, count, "no fhir_id should contain a space");
int nullCount = jdbcTemplate.queryForObject("select count(1) from hfj_resource where fhir_id is null", Integer.class);
assertEquals(0, nullCount, "no fhir_id should be null");
int trailingSpaceCount = jdbcTemplate.queryForObject("select count(1) from hfj_resource where fhir_id <> trim(fhir_id)", Integer.class);
assertEquals(0, trailingSpaceCount, "no fhir_id should contain a space");
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
/**
* Fix for bad version of {@link ForceIdMigrationCopyTask}
* The earlier migration had used at cast to char instead of varchar, which is space-padded on Oracle.
* This migration includes the copy action, but also adds a trim() call to fixup the bad server-assigned ids.
*/
public class ForceIdMigrationFixTask extends BaseTask {
private static final Logger ourLog = LoggerFactory.getLogger(ForceIdMigrationFixTask.class);
Expand Down Expand Up @@ -62,13 +63,47 @@ protected void doExecute() throws SQLException {
// run update in batches.
int rowsPerBlock = 50; // hfj_resource has roughly 50 rows per 8k block.
int batchSize = rowsPerBlock * 2000; // a few thousand IOPS gives a batch size around a second.
ourLog.info(
"About to migrate ids from {} to {} in batches of size {}",
range.getLeft(),
range.getRight(),
batchSize);
for (long batchStart = range.getLeft(); batchStart <= range.getRight(); batchStart = batchStart + batchSize) {
long batchEnd = batchStart + batchSize;
ourLog.info("Migrating client-assigned ids for pids: {}-{}", batchStart, batchEnd);

/*
We have several cases. Two require no action:
1. client-assigned id, with correct value in fhir_id and row in hfj_forced_id
2. server-assigned id, with correct value in fhir_id, no row in hfj_forced_id
And three require action:
3. client-assigned id, no value in fhir_id, but row in hfj_forced_id
4. server-assigned id, no value in fhir_id, and row in hfj_forced_id
5. bad migration - server-assigned id, with wrong space-padded value in fhir_id, no row in hfj_forced_id
*/

executeSql(
"hfj_resource",
"update hfj_resource set fhir_id = trim(fhir_id) where res_id >= ? and res_id < ?",
"update hfj_resource " +
// coalesce is varargs and chooses the first non-null value, like ||
" set fhir_id = coalesce( "
+
// case 5.
" trim(fhir_id), "
+
// case 3
" (select f.forced_id from hfj_forced_id f where f.resource_pid = res_id), "
+
// case 4 - use pid as fhir_id
" cast(res_id as varchar(64)) "
+ " ) "
+
// avoid useless updates on engines that don't check
// skip case 1, 2. Only check 3,4,5
" where (fhir_id is null or fhir_id <> trim(fhir_id)) "
+
// chunk range.
" and res_id >= ? and res_id < ?",
batchStart,
batchEnd);
}
Expand Down

0 comments on commit cc29450

Please sign in to comment.