Skip to content

Commit

Permalink
Allow specifying table creation order Shopify#161
Browse files Browse the repository at this point in the history
This commit introduces the ability for ghostferry-copydb to create
tables in a specific order. This can be useful if tables to be created
contain foreign-key constraints (FKCs).

NOTE: This does not mean that ghostferry supports FKCs! However, with
this feature and by disabling FKCs in the target DB, it is
theoretically possible to migrate DBs with FKCs - this is experimental
and not recommended for production usage. Use with care!

To disable the FKCs, one must add the following config to the target DB
configuration:

    "Params": {
        "foreign_key_checks": "0"
    }
  • Loading branch information
Clemens Kolbitsch committed Mar 23, 2020
1 parent e3384bb commit 8d32795
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 1 deletion.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,18 @@ Features/fixes added in this fork include
this fix has not made it into upstream master yet.
- fix [failure to resume](https://github.com/Shopify/ghostferry/issues/156):
this fix has not made it into upstream master yet.
- allow specifying [table creation order](https://github.com/Shopify/ghostferry/issues/161):
`Ghostferry` does not allow copying tables using *foreign key constraints*,
because it copies data in batches, which is likely to violate constraints,
leading to failures during the copy phase.
The forked version allows specifying the order in which tables need to be
created in the target database. This allows working around constraints in the
setup phase. Additionally disabling foreign key constraint enforcement on the
target database session/connection allows working around constraints during
the copy phase.
Note that an *incomplete* execution of `Ghostferry` will leave the database in
an inconsistent state until the copy is resumed and completed.


Overview of How it Works
------------------------
Expand Down
8 changes: 8 additions & 0 deletions copydb/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ type Config struct {
// Filter configuration for tables to copy
Tables FilterAndRewriteConfigs

// Specifies the order in which to create database tables as <db>.<table> .
// Names refer to original databases and tables (that is, before renaming
// occurs).
// If a table is to be created on start and appears in this list, it is
// created before any other table, and is created in the order listed here.
// All tables not specified in this list are created in arbitrary order.
TablesToBeCreatedFirst []string

// If you're running Ghostferry from a read only replica, turn this option
// on and specify SourceReplicationMaster and ReplicatedMasterPositionQuery.
RunFerryFromReplica bool
Expand Down
2 changes: 1 addition & 1 deletion copydb/copydb.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func (this *CopydbFerry) CreateDatabasesAndTables() error {
// We need to create the same table/schemas on the target database
// as the ones we are copying.
logrus.Info("creating databases and tables on target")
for tableName := range this.Ferry.Tables {
for _, tableName := range this.Ferry.Tables.GetTableListWithPriority(this.config.TablesToBeCreatedFirst) {
t := strings.Split(tableName, ".")

err := this.createDatabaseIfExistsOnTarget(t[0])
Expand Down
7 changes: 7 additions & 0 deletions copydb/test/copydb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ func (t *CopydbTestSuite) TestCreateDatabaseAndTableWithRewrites() {
t.Require().Equal(renamedTableName, value)
}

func (t *CopydbTestSuite) TestCreateDatabaseAndTableWithOrdering() {
// NOTE: Here we just ensure passing a table does not cause issues in the
// invocation. A more thorough test is done in the table-schema tests
t.copydbConfig.TablesToBeCreatedFirst = []string{testSchemaName + "." + testTableName}
t.TestCreateDatabaseAndTableWithRewrites()
}

func TestCopydb(t *testing.T) {
testhelpers.SetupTest()
suite.Run(t, &CopydbTestSuite{})
Expand Down
25 changes: 25 additions & 0 deletions table_schema_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,31 @@ func (c TableSchemaCache) Get(database, table string) *TableSchema {
return c[fullTableName(database, table)]
}

// Helper to sort a given map of tables with a second list giving a priority.
// If an element is present in the input and the priority lists, the item will
// appear first (in the order of the priority list), all other items appear in
// the order given in the input
func (c TableSchemaCache) GetTableListWithPriority(priorityList []string) (prioritzedTableNames []string) {
// just a fast lookup if the list contains items already
contains := map[string]struct{}{}
if len(priorityList) >= 0 {
for _, tableName := range priorityList {
// ignore tables given in the priority list that we don't know
if _, found := c[tableName]; found {
contains[tableName] = struct{}{}
prioritzedTableNames = append(prioritzedTableNames, tableName)
}
}
}
for tableName, _ := range c {
if _, found := contains[tableName]; !found {
prioritzedTableNames = append(prioritzedTableNames, tableName)
}
}

return
}

func showDatabases(c *sql.DB) ([]string, error) {
rows, err := c.Query("show databases")
if err != nil {
Expand Down
48 changes: 48 additions & 0 deletions test/go/table_schema_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,54 @@ func (this *TableSchemaCacheTestSuite) TestQuotedTableNameFromString() {
this.Require().Equal("``.``", ghostferry.QuotedTableNameFromString("", ""))
}

func getMultiTableMap() *ghostferry.TableSchemaCache {
return &ghostferry.TableSchemaCache{
"schema.table1": &ghostferry.TableSchema{
Table: &sqlSchema.Table{
Schema: "schema",
Name: "table1",
},
},
"schema.table2": &ghostferry.TableSchema{
Table: &sqlSchema.Table{
Schema: "schema",
Name: "table2",
},
},
"schema.table3": &ghostferry.TableSchema{
Table: &sqlSchema.Table{
Schema: "schema",
Name: "table3",
},
},
}
}

func (this *TableSchemaCacheTestSuite) TestGetTableListWithPriorityNil() {
tables := getMultiTableMap()
// make sure we are not losing any elements, even if the priority does not
// mater
creationOrder := tables.GetTableListWithPriority(nil)
this.Require().Equal(len(creationOrder), 3)
this.Require().ElementsMatch(creationOrder, tables.AllTableNames())
}

func (this *TableSchemaCacheTestSuite) TestGetTableListWithPriority() {
tables := getMultiTableMap()
creationOrder := tables.GetTableListWithPriority([]string{"schema.table2"})
this.Require().Equal(len(creationOrder), 3)
this.Require().ElementsMatch(creationOrder, tables.AllTableNames())
this.Require().Equal(creationOrder[0], "schema.table2")
}

func (this *TableSchemaCacheTestSuite) TestGetTableListWithPriorityIgnoreUnknown() {
tables := getMultiTableMap()
creationOrder := tables.GetTableListWithPriority([]string{"schema.table2", "schema.unknown_table"})
this.Require().Equal(len(creationOrder), 3)
this.Require().ElementsMatch(creationOrder, tables.AllTableNames())
this.Require().Equal(creationOrder[0], "schema.table2")
}

func TestTableSchemaCache(t *testing.T) {
testhelpers.SetupTest()
suite.Run(t, &TableSchemaCacheTestSuite{GhostferryUnitTestSuite: &testhelpers.GhostferryUnitTestSuite{}})
Expand Down

0 comments on commit 8d32795

Please sign in to comment.