From 2de8002e4e7f8ad705ee5e1fcc500ab4611b2848 Mon Sep 17 00:00:00 2001 From: Morgan Tocker Date: Thu, 18 Apr 2024 10:12:15 -0600 Subject: [PATCH 1/4] Support invisible indexes better --- pkg/migration/migration.go | 1 + pkg/migration/runner.go | 11 +++++ pkg/migration/runner_test.go | 78 ++++++++++++++++++++++++++++++++++++ pkg/utils/utils.go | 27 ++++++++++++- pkg/utils/utils_test.go | 22 ++++++++++ 5 files changed, 138 insertions(+), 1 deletion(-) diff --git a/pkg/migration/migration.go b/pkg/migration/migration.go index ee19f06..e167f3b 100644 --- a/pkg/migration/migration.go +++ b/pkg/migration/migration.go @@ -18,6 +18,7 @@ type Migration struct { Threads int `name:"threads" help:"Number of concurrent threads for copy and checksum tasks" optional:"" default:"4"` TargetChunkTime time.Duration `name:"target-chunk-time" help:"The target copy time for each chunk" optional:"" default:"500ms"` ForceInplace bool `name:"force-inplace" help:"Force attempt to use inplace (only safe without replicas or with Aurora Global)" optional:"" default:"false"` + ForceInvisible bool `name:"force-invisible" help:"Allow invisible index changes even if they can not be made immediately" optional:"" default:"false"` Checksum bool `name:"checksum" help:"Checksum new table before final cut-over" optional:"" default:"true"` ReplicaDSN string `name:"replica-dsn" help:"A DSN for a replica which (if specified) will be used for lag checking." optional:""` ReplicaMaxLag time.Duration `name:"replica-max-lag" help:"The maximum lag allowed on the replica before the migration throttles." optional:"" default:"120s"` diff --git a/pkg/migration/runner.go b/pkg/migration/runner.go index 1c0c0ef..6fa2751 100644 --- a/pkg/migration/runner.go +++ b/pkg/migration/runner.go @@ -226,6 +226,17 @@ func (r *Runner) Run(originalCtx context.Context) error { } } + // Unless Force invisibility is enabled, we don't want to allow visibility changes + // This is because we've already attempted MySQL DDL as instant, and it didn't work. + // It could be because the user is combining this operation with other unsafe operations, + // which is not a good idea. We need to protect them by not allowing it. + // https://github.com/cashapp/spirit/issues/283 + if !r.migration.ForceInvisible { + if err := utils.AlterContainsIndexVisibility("ALTER TABLE unused " + r.migration.Alter); err != nil { + return err + } + } + // Run post-setup checks if err := r.runChecks(ctx, check.ScopePostSetup); err != nil { return err diff --git a/pkg/migration/runner_test.go b/pkg/migration/runner_test.go index 46abf5d..06314b0 100644 --- a/pkg/migration/runner_test.go +++ b/pkg/migration/runner_test.go @@ -2698,3 +2698,81 @@ func TestForNonInstantBurn(t *testing.T) { assert.False(t, m.usedInstantDDL) // it would have had to apply a copy. assert.Equal(t, 0, rowVersions()) // confirm we reset to zero, not 1 (no burn) } + +// From https://github.com/cashapp/spirit/issues/283 +// ALTER INDEX .. VISIBLE is INPLACE which is really weird. +// it only makes sense to be instant, so we attempt it as instant if we know +// it's with a set of safe changes. If it's not with a set of safe changes, +// then we error by default. +func TestIndexVisibility(t *testing.T) { + cfg, err := mysql.ParseDSN(testutils.DSN()) + assert.NoError(t, err) + + testutils.RunSQL(t, `DROP TABLE IF EXISTS indexvisibility`) + table := `CREATE TABLE indexvisibility ( + id int(11) NOT NULL AUTO_INCREMENT, + b INT NOT NULL, + PRIMARY KEY (id), + INDEX (b) + )` + testutils.RunSQL(t, table) + m, err := NewRunner(&Migration{ + Host: cfg.Addr, + Username: cfg.User, + Password: cfg.Passwd, + Database: cfg.DBName, + Threads: 1, + Table: "indexvisibility", + Alter: "ALTER INDEX b INVISIBLE", + }) + assert.NoError(t, err) + err = m.Run(context.Background()) + assert.NoError(t, err) + + assert.True(t, m.usedInplaceDDL) // expected to count as safe. + + // Test again with visible + m, err = NewRunner(&Migration{ + Host: cfg.Addr, + Username: cfg.User, + Password: cfg.Passwd, + Database: cfg.DBName, + Threads: 1, + Table: "indexvisibility", + Alter: "ALTER INDEX b VISIBLE", + }) + assert.NoError(t, err) + err = m.Run(context.Background()) + assert.NoError(t, err) + assert.True(t, m.usedInplaceDDL) // expected to count as safe. + + // Test again but include an unsafe change at the same time. + m, err = NewRunner(&Migration{ + Host: cfg.Addr, + Username: cfg.User, + Password: cfg.Passwd, + Database: cfg.DBName, + Threads: 1, + Table: "indexvisibility", + Alter: "ALTER INDEX b VISIBLE, ADD newcol INT", + }) + assert.NoError(t, err) + err = m.Run(context.Background()) + assert.Error(t, err) + assert.NoError(t, m.Close()) // it's errored, we don't need to try again. We can close. + + // But we will allow the same as before as long as the ForceInvisible flag is set. + m, err = NewRunner(&Migration{ + Host: cfg.Addr, + Username: cfg.User, + Password: cfg.Passwd, + Database: cfg.DBName, + Threads: 1, + Table: "indexvisibility", + Alter: "ALTER INDEX b VISIBLE, ADD newcol INT", + ForceInvisible: true, + }) + assert.NoError(t, err) + err = m.Run(context.Background()) + assert.NoError(t, err) +} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 39b6365..ac62d17 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -88,7 +88,8 @@ func AlgorithmInplaceConsideredSafe(sql string) error { for _, spec := range alterStmt.Specs { switch spec.Tp { case ast.AlterTableDropIndex, - ast.AlterTableRenameIndex: + ast.AlterTableRenameIndex, + ast.AlterTableIndexInvisible: continue default: unsafeClauses++ @@ -157,6 +158,30 @@ func AlterContainsAddUnique(sql string) error { return nil } +// AlterContainsIndexVisibility checks to see if there are any clauses of an ALTER to change index visibility. +// It really does not make sense for visibility changes to be anything except metadata only changes, +// because they are used for experiments. An experiment is not rebuilding the table. If you are experimenting +// setting an index to invisible and plan to switch it back to visible quickly if required, going through +// a full table rebuild does not make sense. +func AlterContainsIndexVisibility(sql string) error { + p := parser.New() + stmtNodes, _, err := p.Parse(sql, "", "") + if err != nil { + return err + } + stmt := &stmtNodes[0] + alterStmt, ok := (*stmt).(*ast.AlterTableStmt) + if !ok { + return err + } + for _, spec := range alterStmt.Specs { + if spec.Tp == ast.AlterTableIndexInvisible { + return fmt.Errorf("the ALTER operation contains a change to index visibility and could not be completed as a meta-data only operation. Use --force-invisible to override this safety check if absolutely necessary") + } + } + return nil +} + func TrimAlter(alter string) string { return strings.TrimSuffix(strings.TrimSpace(alter), ";") } diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go index bf0407c..d996e10 100644 --- a/pkg/utils/utils_test.go +++ b/pkg/utils/utils_test.go @@ -56,6 +56,8 @@ func TestAlgorithmInplaceConsideredSafe(t *testing.T) { assert.NoError(t, AlgorithmInplaceConsideredSafe(alter("rename index `a` to `b`"))) assert.NoError(t, AlgorithmInplaceConsideredSafe(alter("drop index `a`, drop index `b`"))) assert.NoError(t, AlgorithmInplaceConsideredSafe(alter("drop index `a`, rename index `b` to c"))) + assert.NoError(t, AlgorithmInplaceConsideredSafe(alter("ALTER INDEX b INVISIBLE"))) + assert.NoError(t, AlgorithmInplaceConsideredSafe(alter("ALTER INDEX b VISIBLE"))) assert.Error(t, AlgorithmInplaceConsideredSafe(alter("ADD COLUMN `a` INT"))) assert.Error(t, AlgorithmInplaceConsideredSafe(alter("ADD index (a)"))) @@ -64,6 +66,7 @@ func TestAlgorithmInplaceConsideredSafe(t *testing.T) { // this *should* be safe, but we don't support it yet because we can't // guess which operations are INSTANT assert.Error(t, AlgorithmInplaceConsideredSafe(alter("drop index `a`, add column `b` int"))) + assert.Error(t, AlgorithmInplaceConsideredSafe(alter("ALTER INDEX b INVISIBLE, add column `c` int"))) } func TestAlterIsAddUnique(t *testing.T) { @@ -82,6 +85,25 @@ func TestAlterIsAddUnique(t *testing.T) { assert.Error(t, AlterContainsAddUnique(alter("add unique(b)"))) // this is potentially lossy. } +func TestAlterContainsIndexVisibility(t *testing.T) { + var alter = func(stmt string) string { + return "ALTER TABLE `test`.`t1` " + stmt + } + assert.NoError(t, AlterContainsIndexVisibility(alter("drop index `a`"))) + assert.NoError(t, AlterContainsIndexVisibility(alter("rename index `a` to `b`"))) + assert.NoError(t, AlterContainsIndexVisibility(alter("drop index `a`, drop index `b`"))) + assert.NoError(t, AlterContainsIndexVisibility(alter("drop index `a`, rename index `b` to c"))) + + assert.NoError(t, AlterContainsIndexVisibility(alter("ADD COLUMN `a` INT"))) + assert.NoError(t, AlterContainsIndexVisibility(alter("ADD index (a)"))) + assert.NoError(t, AlterContainsIndexVisibility(alter("drop index `a`, add index `b` (`b`)"))) + assert.NoError(t, AlterContainsIndexVisibility(alter("engine=innodb"))) + assert.NoError(t, AlterContainsIndexVisibility(alter("add unique(b)"))) + + assert.Error(t, AlterContainsIndexVisibility(alter("ALTER INDEX b INVISIBLE"))) + assert.Error(t, AlterContainsIndexVisibility(alter("ALTER INDEX b VISIBLE"))) +} + func TestTrimAlter(t *testing.T) { assert.Equal(t, "ADD COLUMN `a` INT", TrimAlter("ADD COLUMN `a` INT")) assert.Equal(t, "engine=innodb", TrimAlter("engine=innodb;")) From 6fa3fef079b13db98255d2080c828ce3179af689 Mon Sep 17 00:00:00 2001 From: Morgan Tocker Date: Thu, 18 Apr 2024 10:17:29 -0600 Subject: [PATCH 2/4] Add docs --- USAGE.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/USAGE.md b/USAGE.md index ba0676c..cadadfc 100644 --- a/USAGE.md +++ b/USAGE.md @@ -98,6 +98,15 @@ When set to `TRUE`, Spirit will attempt to perform the schema change using MySQL Even when force-inplace is `FALSE`, Spirit automatically detects "safe" operations that use the `INPLACE` algorithm. These include operations that modify only metadata, specifically `DROP KEY/INDEX` and `RENAME KEY/INDEX`. Consult https://dev.mysql.com/doc/refman/8.0/en/innodb-online-ddl-operations.html for more details. +### force-inplace + +- Type: Boolean +- Default value: FALSE + +By default, Spirit will only allow changes to [index visibility](https://dev.mysql.com/doc/refman/8.3/en/invisible-indexes.html) to occur as meta-data only operations. This is because invisible indexes, really just supports the use-case of experimentation. If you are attempting to change an index to invisible and it causes a full copy, this is quite a surprising behavior. It is also unsafe if you consider the reciprocal: setting the index back to visible as a copy could take substantial time. + +When set to `TRUE`, Spirit will disable this safety check and allow index visibility changes to be performed as copy operations. + ### checksum - Type: Boolean From 115470bc282ffb5f26e6421d461fbde6ab599b02 Mon Sep 17 00:00:00 2001 From: Morgan Tocker Date: Thu, 18 Apr 2024 13:54:11 -0600 Subject: [PATCH 3/4] Update test and docs to optionless behavior --- USAGE.md | 11 +------- pkg/migration/migration.go | 1 - pkg/migration/runner.go | 12 ++++---- pkg/migration/runner_test.go | 55 +++++++++++++++++++++++++++--------- pkg/utils/utils.go | 2 +- 5 files changed, 48 insertions(+), 33 deletions(-) diff --git a/USAGE.md b/USAGE.md index cadadfc..43415a6 100644 --- a/USAGE.md +++ b/USAGE.md @@ -96,16 +96,7 @@ Note that Spirit does not support dynamically adjusting the target-chunk-time wh When set to `TRUE`, Spirit will attempt to perform the schema change using MySQL's `INPLACE` algorithm, before falling back to performing its usual copy process. `INPLACE` is non-blocking on the system where the DDL is initiated, but it will block on binary-log based read replicas. This means it's typically only safe to enable if you have no read replicas, or your read replicas are based on physical log shipping (i.e. Aurora). -Even when force-inplace is `FALSE`, Spirit automatically detects "safe" operations that use the `INPLACE` algorithm. These include operations that modify only metadata, specifically `DROP KEY/INDEX` and `RENAME KEY/INDEX`. Consult https://dev.mysql.com/doc/refman/8.0/en/innodb-online-ddl-operations.html for more details. - -### force-inplace - -- Type: Boolean -- Default value: FALSE - -By default, Spirit will only allow changes to [index visibility](https://dev.mysql.com/doc/refman/8.3/en/invisible-indexes.html) to occur as meta-data only operations. This is because invisible indexes, really just supports the use-case of experimentation. If you are attempting to change an index to invisible and it causes a full copy, this is quite a surprising behavior. It is also unsafe if you consider the reciprocal: setting the index back to visible as a copy could take substantial time. - -When set to `TRUE`, Spirit will disable this safety check and allow index visibility changes to be performed as copy operations. +Even when force-inplace is `FALSE`, Spirit automatically detects "safe" operations that use the `INPLACE` algorithm. These include operations that modify only metadata, specifically `ALTER INDEX .. VISIBLE/INVISIBLE`, `DROP KEY/INDEX` and `RENAME KEY/INDEX`. Consult https://dev.mysql.com/doc/refman/8.0/en/innodb-online-ddl-operations.html for more details. ### checksum diff --git a/pkg/migration/migration.go b/pkg/migration/migration.go index e167f3b..ee19f06 100644 --- a/pkg/migration/migration.go +++ b/pkg/migration/migration.go @@ -18,7 +18,6 @@ type Migration struct { Threads int `name:"threads" help:"Number of concurrent threads for copy and checksum tasks" optional:"" default:"4"` TargetChunkTime time.Duration `name:"target-chunk-time" help:"The target copy time for each chunk" optional:"" default:"500ms"` ForceInplace bool `name:"force-inplace" help:"Force attempt to use inplace (only safe without replicas or with Aurora Global)" optional:"" default:"false"` - ForceInvisible bool `name:"force-invisible" help:"Allow invisible index changes even if they can not be made immediately" optional:"" default:"false"` Checksum bool `name:"checksum" help:"Checksum new table before final cut-over" optional:"" default:"true"` ReplicaDSN string `name:"replica-dsn" help:"A DSN for a replica which (if specified) will be used for lag checking." optional:""` ReplicaMaxLag time.Duration `name:"replica-max-lag" help:"The maximum lag allowed on the replica before the migration throttles." optional:"" default:"120s"` diff --git a/pkg/migration/runner.go b/pkg/migration/runner.go index 6fa2751..e26d66c 100644 --- a/pkg/migration/runner.go +++ b/pkg/migration/runner.go @@ -226,15 +226,13 @@ func (r *Runner) Run(originalCtx context.Context) error { } } - // Unless Force invisibility is enabled, we don't want to allow visibility changes - // This is because we've already attempted MySQL DDL as instant, and it didn't work. - // It could be because the user is combining this operation with other unsafe operations, + // We don't want to allow visibility changes + // This is because we've already attempted MySQL DDL as INPLACE, and it didn't work. + // It likely means the user is combining this operation with other unsafe operations, // which is not a good idea. We need to protect them by not allowing it. // https://github.com/cashapp/spirit/issues/283 - if !r.migration.ForceInvisible { - if err := utils.AlterContainsIndexVisibility("ALTER TABLE unused " + r.migration.Alter); err != nil { - return err - } + if err := utils.AlterContainsIndexVisibility("ALTER TABLE unused " + r.migration.Alter); err != nil { + return err } // Run post-setup checks diff --git a/pkg/migration/runner_test.go b/pkg/migration/runner_test.go index 06314b0..1bc919b 100644 --- a/pkg/migration/runner_test.go +++ b/pkg/migration/runner_test.go @@ -2701,9 +2701,15 @@ func TestForNonInstantBurn(t *testing.T) { // From https://github.com/cashapp/spirit/issues/283 // ALTER INDEX .. VISIBLE is INPLACE which is really weird. -// it only makes sense to be instant, so we attempt it as instant if we know -// it's with a set of safe changes. If it's not with a set of safe changes, -// then we error by default. +// it only makes sense to be instant, so we attempt it as a "safe inplace". +// If it's not with a set of safe changes, then we error. +// This means the user is expected to split their DDL into two separate ALTERs. +// +// There is a partial workaround for users to use --force-inplace, which would +// help only if the other included changes are also INPLACE and not copy. +// We *do* document this under --force-inplace docs, but it's +// really not a typical use case to ever mix invisible with any other change. +// i.e. if anything it's more a side-effect than a workaround. func TestIndexVisibility(t *testing.T) { cfg, err := mysql.ParseDSN(testutils.DSN()) assert.NoError(t, err) @@ -2712,6 +2718,7 @@ func TestIndexVisibility(t *testing.T) { table := `CREATE TABLE indexvisibility ( id int(11) NOT NULL AUTO_INCREMENT, b INT NOT NULL, + c INT NOT NULL, PRIMARY KEY (id), INDEX (b) )` @@ -2746,7 +2753,8 @@ func TestIndexVisibility(t *testing.T) { assert.NoError(t, err) assert.True(t, m.usedInplaceDDL) // expected to count as safe. - // Test again but include an unsafe change at the same time. + // Test again but include an unsafe INPLACE change at the same time. + // This won't work by default. m, err = NewRunner(&Migration{ Host: cfg.Addr, Username: cfg.User, @@ -2754,25 +2762,44 @@ func TestIndexVisibility(t *testing.T) { Database: cfg.DBName, Threads: 1, Table: "indexvisibility", - Alter: "ALTER INDEX b VISIBLE, ADD newcol INT", + Alter: "ALTER INDEX b VISIBLE, ADD INDEX (c)", }) assert.NoError(t, err) err = m.Run(context.Background()) assert.Error(t, err) assert.NoError(t, m.Close()) // it's errored, we don't need to try again. We can close. - // But we will allow the same as before as long as the ForceInvisible flag is set. + // But we will allow the above when force inplace is set. m, err = NewRunner(&Migration{ - Host: cfg.Addr, - Username: cfg.User, - Password: cfg.Passwd, - Database: cfg.DBName, - Threads: 1, - Table: "indexvisibility", - Alter: "ALTER INDEX b VISIBLE, ADD newcol INT", - ForceInvisible: true, + Host: cfg.Addr, + Username: cfg.User, + Password: cfg.Passwd, + Database: cfg.DBName, + Threads: 1, + Table: "indexvisibility", + Alter: "ALTER INDEX b VISIBLE, ADD INDEX (c)", + ForceInplace: true, }) assert.NoError(t, err) err = m.Run(context.Background()) assert.NoError(t, err) + + // But even when force inplace is set, we won't be able to do an operation + // that requires a full copy. This is important because invisible should + // never be mixed with copy (the semantics are weird since it's for experiments). + m, err = NewRunner(&Migration{ + Host: cfg.Addr, + Username: cfg.User, + Password: cfg.Passwd, + Database: cfg.DBName, + Threads: 1, + Table: "indexvisibility", + Alter: "ALTER INDEX b VISIBLE, CHANGE c BIGINT NOT NULL", + ForceInplace: true, + }) + assert.NoError(t, err) + err = m.Run(context.Background()) + assert.Error(t, err) + assert.NoError(t, m.Close()) // it's errored, we don't need to try again. We can close. + } diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index ac62d17..04a2a22 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -176,7 +176,7 @@ func AlterContainsIndexVisibility(sql string) error { } for _, spec := range alterStmt.Specs { if spec.Tp == ast.AlterTableIndexInvisible { - return fmt.Errorf("the ALTER operation contains a change to index visibility and could not be completed as a meta-data only operation. Use --force-invisible to override this safety check if absolutely necessary") + return fmt.Errorf("the ALTER operation contains a change to index visibility and could not be completed as a meta-data only operation. This is a safety check! Please split the ALTER statement into separate statements for changing the invisible index and other operations") } } return nil From ee4decb4aeae0425f489195ef91d51e2cb346b69 Mon Sep 17 00:00:00 2001 From: Morgan Tocker Date: Thu, 18 Apr 2024 13:57:42 -0600 Subject: [PATCH 4/4] fix linter --- pkg/migration/runner_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/migration/runner_test.go b/pkg/migration/runner_test.go index 1bc919b..5e2b0d0 100644 --- a/pkg/migration/runner_test.go +++ b/pkg/migration/runner_test.go @@ -2801,5 +2801,4 @@ func TestIndexVisibility(t *testing.T) { err = m.Run(context.Background()) assert.Error(t, err) assert.NoError(t, m.Close()) // it's errored, we don't need to try again. We can close. - }