From aa87ac9bb316f4e477d3bac857a3601e804ec45f Mon Sep 17 00:00:00 2001 From: Nikita Zheleztsov Date: Wed, 20 Dec 2023 17:23:40 +0300 Subject: [PATCH] replicaset: soften name validation During upgrade to Tarantool 3.0.0 there's a time, when instance name have not been set yet. If vshard strictly validates names on connection, it leads to the cluster unavailability by the time, when all names are configured. This commit softens the check and allows name to be nil, when instance UUID is specified in configuration, only UUID is validated in such case. In conclusion we have the following checks on connection: 1. UUID is validated in the following cases: a. When `identification_mode` is `uuid_as_key` b. When `identification_mode` is `name_as_key` and replica.uuid is specified 2. Instance name is validated only when `identification_mode` is `name_as_key` and can work in the following modes: a. Strict validation (nil instance name is not allowed), when replica.uuid is not specified. b. Soft validation (nil instance name is allowed), when replica.uuid is specified. Follow-up #426 NO_DOC=internal --- test/replicaset-luatest/replicaset_3_test.lua | 14 ++++++++++++-- vshard/replicaset.lua | 10 +++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/test/replicaset-luatest/replicaset_3_test.lua b/test/replicaset-luatest/replicaset_3_test.lua index 8016bd96..0107e7ba 100644 --- a/test/replicaset-luatest/replicaset_3_test.lua +++ b/test/replicaset-luatest/replicaset_3_test.lua @@ -269,14 +269,24 @@ test_group.test_named_replicaset = function(g) t.assert_equals(rs.id, rs.name) t.assert_equals(replica_1_a.id, replica_1_a.name) - -- Name is not set, name mismatch error. + -- Name is not set, uuid is not set, name mismatch error. local ret, err = rs:callrw('get_uuid', {}, {timeout = 5}) t.assert_equals(err.name, 'INSTANCE_NAME_MISMATCH') t.assert_equals(ret, nil) + local uuid_a = g.replica_1_a:instance_uuid() + -- Test, that NAME_MISMATCH error is skipped, when uuid is specified. + -- Before the name configuration, as a name cannot be dropped. New + -- replicaset in order not to rebuild it for the name configuration. + new_global_cfg.sharding['replicaset'].replicas['replica_1_a'].uuid = + g.replica_1_a:instance_uuid() + local rs_2 = vreplicaset.buildall(new_global_cfg).replicaset + ret, err = rs_2:callrw('get_uuid', {}, timeout_opts) + t.assert_equals(err, nil) + t.assert_equals(ret, uuid_a) + -- Set name, everything works from now on. g.replica_1_a:exec(function() box.cfg{instance_name = 'replica_1_a'} end) - local uuid_a = g.replica_1_a:instance_uuid() ret, err = rs:callrw('get_uuid', {}, timeout_opts) t.assert_equals(err, nil) t.assert_equals(ret, uuid_a) diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua index e213fc80..ef3e7376 100644 --- a/vshard/replicaset.lua +++ b/vshard/replicaset.lua @@ -137,6 +137,7 @@ end -- local function conn_vconnect_check(conn) local vconn = conn.vconnect + local replica = conn.replica -- conn.vconnect may be nil, if connection was created on old version -- and the storage was reloaded to a new one. It's also nil, when -- all checks were already done. @@ -146,7 +147,7 @@ local function conn_vconnect_check(conn) -- Nothing to do, but wait in such case. if not vconn.future or not vconn.future:is_ready() then return nil, lerror.vshard(lerror.code.VHANDSHAKE_NOT_COMPLETE, - conn.replica.id) + replica.id) end -- Critical errors. Connection should be closed after these ones. local result, err = vconn.future:result() @@ -154,9 +155,12 @@ local function conn_vconnect_check(conn) -- Failed to get response. E.g. access error. return nil, lerror.make(err) end - if vconn.is_named and result[1].name ~= conn.replica.name then + -- If name is nil, it means, name was not set yet. If uuid is specified, + -- then we allow mismatch between config name and nil. + local is_name_set = result[1].name ~= nil or replica.uuid == nil + if vconn.is_named and is_name_set and result[1].name ~= replica.name then return nil, lerror.vshard(lerror.code.INSTANCE_NAME_MISMATCH, - conn.replica.name, result[1].name) + replica.name, result[1].name) end -- Don't validate until reconnect happens. conn.vconnect = nil