From 8d32795a005d4b92cacba22205a95549751da918 Mon Sep 17 00:00:00 2001 From: Clemens Kolbitsch Date: Tue, 17 Mar 2020 10:07:38 -0700 Subject: [PATCH] Allow specifying table creation order #161 This commit introduces the ability for ghostferry-copydb to create tables in a specific order. This can be useful if tables to be created contain foreign-key constraints (FKCs). NOTE: This does not mean that ghostferry supports FKCs! However, with this feature and by disabling FKCs in the target DB, it is theoretically possible to migrate DBs with FKCs - this is experimental and not recommended for production usage. Use with care! To disable the FKCs, one must add the following config to the target DB configuration: "Params": { "foreign_key_checks": "0" } --- README.md | 12 ++++++++ copydb/config.go | 8 +++++ copydb/copydb.go | 2 +- copydb/test/copydb_test.go | 7 +++++ table_schema_cache.go | 25 ++++++++++++++++ test/go/table_schema_cache_test.go | 48 ++++++++++++++++++++++++++++++ 6 files changed, 101 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4775b988..5db0360e 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,18 @@ Features/fixes added in this fork include this fix has not made it into upstream master yet. - fix [failure to resume](https://github.com/Shopify/ghostferry/issues/156): this fix has not made it into upstream master yet. +- allow specifying [table creation order](https://github.com/Shopify/ghostferry/issues/161): + `Ghostferry` does not allow copying tables using *foreign key constraints*, + because it copies data in batches, which is likely to violate constraints, + leading to failures during the copy phase. + The forked version allows specifying the order in which tables need to be + created in the target database. This allows working around constraints in the + setup phase. Additionally disabling foreign key constraint enforcement on the + target database session/connection allows working around constraints during + the copy phase. + Note that an *incomplete* execution of `Ghostferry` will leave the database in + an inconsistent state until the copy is resumed and completed. + Overview of How it Works ------------------------ diff --git a/copydb/config.go b/copydb/config.go index 1efe2c92..e84267c9 100644 --- a/copydb/config.go +++ b/copydb/config.go @@ -44,6 +44,14 @@ type Config struct { // Filter configuration for tables to copy Tables FilterAndRewriteConfigs + // Specifies the order in which to create database tables as . . + // Names refer to original databases and tables (that is, before renaming + // occurs). + // If a table is to be created on start and appears in this list, it is + // created before any other table, and is created in the order listed here. + // All tables not specified in this list are created in arbitrary order. + TablesToBeCreatedFirst []string + // If you're running Ghostferry from a read only replica, turn this option // on and specify SourceReplicationMaster and ReplicatedMasterPositionQuery. RunFerryFromReplica bool diff --git a/copydb/copydb.go b/copydb/copydb.go index 4f1a8d9e..c2a5434f 100644 --- a/copydb/copydb.go +++ b/copydb/copydb.go @@ -60,7 +60,7 @@ func (this *CopydbFerry) CreateDatabasesAndTables() error { // We need to create the same table/schemas on the target database // as the ones we are copying. logrus.Info("creating databases and tables on target") - for tableName := range this.Ferry.Tables { + for _, tableName := range this.Ferry.Tables.GetTableListWithPriority(this.config.TablesToBeCreatedFirst) { t := strings.Split(tableName, ".") err := this.createDatabaseIfExistsOnTarget(t[0]) diff --git a/copydb/test/copydb_test.go b/copydb/test/copydb_test.go index 22dd806a..d82878c6 100644 --- a/copydb/test/copydb_test.go +++ b/copydb/test/copydb_test.go @@ -85,6 +85,13 @@ func (t *CopydbTestSuite) TestCreateDatabaseAndTableWithRewrites() { t.Require().Equal(renamedTableName, value) } +func (t *CopydbTestSuite) TestCreateDatabaseAndTableWithOrdering() { + // NOTE: Here we just ensure passing a table does not cause issues in the + // invocation. A more thorough test is done in the table-schema tests + t.copydbConfig.TablesToBeCreatedFirst = []string{testSchemaName + "." + testTableName} + t.TestCreateDatabaseAndTableWithRewrites() +} + func TestCopydb(t *testing.T) { testhelpers.SetupTest() suite.Run(t, &CopydbTestSuite{}) diff --git a/table_schema_cache.go b/table_schema_cache.go index 76dd098a..222e580e 100644 --- a/table_schema_cache.go +++ b/table_schema_cache.go @@ -293,6 +293,31 @@ func (c TableSchemaCache) Get(database, table string) *TableSchema { return c[fullTableName(database, table)] } +// Helper to sort a given map of tables with a second list giving a priority. +// If an element is present in the input and the priority lists, the item will +// appear first (in the order of the priority list), all other items appear in +// the order given in the input +func (c TableSchemaCache) GetTableListWithPriority(priorityList []string) (prioritzedTableNames []string) { + // just a fast lookup if the list contains items already + contains := map[string]struct{}{} + if len(priorityList) >= 0 { + for _, tableName := range priorityList { + // ignore tables given in the priority list that we don't know + if _, found := c[tableName]; found { + contains[tableName] = struct{}{} + prioritzedTableNames = append(prioritzedTableNames, tableName) + } + } + } + for tableName, _ := range c { + if _, found := contains[tableName]; !found { + prioritzedTableNames = append(prioritzedTableNames, tableName) + } + } + + return +} + func showDatabases(c *sql.DB) ([]string, error) { rows, err := c.Query("show databases") if err != nil { diff --git a/test/go/table_schema_cache_test.go b/test/go/table_schema_cache_test.go index 55fa58b7..0bc9700e 100644 --- a/test/go/table_schema_cache_test.go +++ b/test/go/table_schema_cache_test.go @@ -351,6 +351,54 @@ func (this *TableSchemaCacheTestSuite) TestQuotedTableNameFromString() { this.Require().Equal("``.``", ghostferry.QuotedTableNameFromString("", "")) } +func getMultiTableMap() *ghostferry.TableSchemaCache { + return &ghostferry.TableSchemaCache{ + "schema.table1": &ghostferry.TableSchema{ + Table: &sqlSchema.Table{ + Schema: "schema", + Name: "table1", + }, + }, + "schema.table2": &ghostferry.TableSchema{ + Table: &sqlSchema.Table{ + Schema: "schema", + Name: "table2", + }, + }, + "schema.table3": &ghostferry.TableSchema{ + Table: &sqlSchema.Table{ + Schema: "schema", + Name: "table3", + }, + }, + } +} + +func (this *TableSchemaCacheTestSuite) TestGetTableListWithPriorityNil() { + tables := getMultiTableMap() + // make sure we are not losing any elements, even if the priority does not + // mater + creationOrder := tables.GetTableListWithPriority(nil) + this.Require().Equal(len(creationOrder), 3) + this.Require().ElementsMatch(creationOrder, tables.AllTableNames()) +} + +func (this *TableSchemaCacheTestSuite) TestGetTableListWithPriority() { + tables := getMultiTableMap() + creationOrder := tables.GetTableListWithPriority([]string{"schema.table2"}) + this.Require().Equal(len(creationOrder), 3) + this.Require().ElementsMatch(creationOrder, tables.AllTableNames()) + this.Require().Equal(creationOrder[0], "schema.table2") +} + +func (this *TableSchemaCacheTestSuite) TestGetTableListWithPriorityIgnoreUnknown() { + tables := getMultiTableMap() + creationOrder := tables.GetTableListWithPriority([]string{"schema.table2", "schema.unknown_table"}) + this.Require().Equal(len(creationOrder), 3) + this.Require().ElementsMatch(creationOrder, tables.AllTableNames()) + this.Require().Equal(creationOrder[0], "schema.table2") +} + func TestTableSchemaCache(t *testing.T) { testhelpers.SetupTest() suite.Run(t, &TableSchemaCacheTestSuite{GhostferryUnitTestSuite: &testhelpers.GhostferryUnitTestSuite{}})