Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add doubled buckets issue #2276

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Added

- Rocks versions are shown in the WebUI.

- New issue about doubled buckets (can be enabled with TARANTOOL_CHECK_DOUBLED_BUCKETS=true).

-------------------------------------------------------------------------------
[2.12.3] - 2024-08-16
-------------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions cartridge.lua
Original file line number Diff line number Diff line change
Expand Up @@ -874,13 +874,16 @@ local function cfg(opts, box_opts)

local res, err = argparse.get_opts({
disable_unrecoverable_instances = 'boolean',
check_doubled_buckets = 'boolean',
check_doubled_buckets_period = 'number',
})

if err ~= nil then
return nil, err
end

issues.disable_unrecoverable(res.disable_unrecoverable_instances)
issues.check_doubled_buckets(res.check_doubled_buckets, res.check_doubled_buckets_period)

if opts.upload_prefix ~= nil then
local path = opts.upload_prefix
Expand Down
35 changes: 35 additions & 0 deletions cartridge/issues.lua
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
-- * various vshard alerts (see vshard docs for details);
-- * warning: "Group "..." wasn't bootstrapped: ...";
-- * warning: Vshard storages in replicaset %s marked as "all writable".
-- * warning: "Cluster has ... doubled buckets. Call require('cartridge.vshard-utils').find_doubled_buckets() for details";
-- You can enable extra vshard issues by setting
-- `TARANTOOL_ADD_VSHARD_STORAGE_ALERTS_TO_ISSUES=true/TARANTOOL_ADD_VSHARD_ROUTER_ALERTS_TO_ISSUES=true`
-- or with `--add-vshard-storage-alerts-to-issues/--add-vshard-router-alerts-to-issues` command-line argument.
Expand Down Expand Up @@ -125,6 +126,7 @@ local lua_api_proxy = require('cartridge.lua-api.proxy')
local lua_api_topology = require('cartridge.lua-api.topology')
local invalid_format = require('cartridge.invalid-format')
local sync_spaces = require('cartridge.sync-spaces')
local vshard_utils = require('cartridge.vshard-utils')

local ValidateConfigError = errors.new_class('ValidateConfigError')

Expand Down Expand Up @@ -154,6 +156,9 @@ local limits_ranges = {

vars:new('limits', default_limits)
vars:new('disable_unrecoverable', false)
vars:new('check_doubled_buckets', false)
vars:new('check_doubled_buckets_period', 24*60*60) -- 24 hours

vars:new('instance_uuid')
vars:new('replicaset_uuid')

Expand Down Expand Up @@ -565,6 +570,8 @@ local function list_on_instance(opts)
end

local disk_failure_cache = {}
local doubled_buckets_count_cache = 0
local last_doubled_buckets_check = fiber.time()
local function list_on_cluster()
local state, err = confapplier.get_state()
if state == 'Unconfigured' and lua_api_proxy.can_call() then
Expand Down Expand Up @@ -746,6 +753,28 @@ local function list_on_cluster()
end
end

if vars.check_doubled_buckets == true
and last_doubled_buckets_check + vars.check_doubled_buckets_period > fiber.time()
then
local doubled_buckets = vshard_utils.find_doubled_buckets() or {}
doubled_buckets_count_cache = 0
for _ in pairs(doubled_buckets) do
doubled_buckets_count_cache = doubled_buckets_count_cache + 1
end
last_doubled_buckets_check = fiber.time()
end

if doubled_buckets_count_cache > 0 then
table.insert(ret, {
level = 'warning',
topic = 'vshard',
message = string.format(
"Cluster has %d doubled buckets. " ..
"Call require('cartridge.vshard-utils').find_doubled_buckets() for details",
doubled_buckets_count_cache
)
})
end
-- Get each instance issues (replication, failover, memory usage)

local twophase_vars = require('cartridge.vars').new('cartridge.twophase')
Expand Down Expand Up @@ -859,4 +888,10 @@ return {
disable_unrecoverable = function(disable)
vars.disable_unrecoverable = disable
end,
check_doubled_buckets = function(check, period)
vars.check_doubled_buckets = check
if period ~= nil then
vars.check_doubled_buckets_period = period
end
end,
}
45 changes: 45 additions & 0 deletions cartridge/vshard-utils.lua
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,50 @@ local function can_bootstrap_group(group_name, vsgroup)
return true
end

-- see https://github.com/tarantool/vshard/issues/412 for details
local function find_doubled_buckets()
if roles.get_role('vshard-router') == nil then
return false
end
local vshard = require('vshard')

local BUCKET_COUNT = vshard.router.bucket_count()
local all_buckets = {}
for id = 1, BUCKET_COUNT do
all_buckets[id] = {
count = 0,
info = {},
uuids = {},
}
end

local routes = vshard.router.routeall()
for _, replicaset in pairs(routes) do
local buckets, err = replicaset:callro(
'vshard.storage.buckets_info', {}, {timeout = 5}
)
if err then
return nil, err
end

for id, bucket in pairs(buckets) do
all_buckets[id].count = all_buckets[id].count + 1
table.insert(all_buckets[id].uuids, replicaset.uuid)
table.insert(all_buckets[id].info, bucket)
end
end

local intersection = {}
for id = 1, BUCKET_COUNT do
if all_buckets[id].count > 1 then
intersection[id] = all_buckets[id]
end
end

return intersection
end


local function can_bootstrap()
if roles.get_role('vshard-router') == nil then
return false
Expand Down Expand Up @@ -764,6 +808,7 @@ return {
can_bootstrap = can_bootstrap,
edit_vshard_options = edit_vshard_options,
patch_zone_distances = patch_zone_distances,
find_doubled_buckets = find_doubled_buckets,

init = init,
}
8 changes: 8 additions & 0 deletions rst/cartridge_admin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,14 @@ Cartridge displays cluster and instances issues in WebUI:
* **warning**: "Vshard storages in replicaset ... marked as "all writable".
You can fix it by setting ``all_rw = false`` in the replicaset configuration;

* **warning**: "Cluster has ... doubled buckets. Call require('cartridge.vshard-utils').find_doubled_buckets() for details"
-- you need to call ``require('cartridge.vshard-utils').find_doubled_buckets()`` to get more info
and then remove all duplicated data manually and then use ``vshard.storage.bucket_force_drop(bucket_id)``
to remove the bucket. See https://github.com/tarantool/vshard/issues/412 for details.
This issue is disabled by default. You can enable it by setting
``TARANTOOL_CHECK_DOUBLED_BUCKETS=true`` and then chech will run once a
``TARANTOOL_CHECK_DOUBLED_BUCKETS_PERIOD`` (default is 24 hours);

You can enable extra vshard issues by setting
``TARANTOOL_ADD_VSHARD_STORAGE_ALERTS_TO_ISSUES=true/TARANTOOL_ADD_VSHARD_ROUTER_ALERTS_TO_ISSUES=true``
or with ``--add-vshard-storage-alerts-to-issues/--add-vshard-router-alerts-to-issues`` command-line argument.
Expand Down
72 changes: 72 additions & 0 deletions test/integration/vshard_doubled_buckets_test.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
local fio = require('fio')
local t = require('luatest')
local netbox = require('net.box')
local g = t.group()

local helpers = require('test.helper')

g.before_all = function()
g.cluster = helpers.Cluster:new({
datadir = fio.tempdir(),
server_command = helpers.entrypoint('srv_basic'),
cookie = helpers.random_cookie(),
use_vshard = true,
replicasets = {
{
alias = 'router',
roles = {'vshard-router'},
servers = 1,
},
{
alias = 'storage-1',
roles = {'vshard-storage'},
servers = 1,
},
{
alias = 'storage-2',
roles = {'vshard-storage'},
servers = 1,
},
},
env = {
TARANTOOL_CHECK_DOUBLED_BUCKETS = 'true',
TARANTOOL_CHECK_DOUBLED_BUCKETS_PERIOD = '10',
},
})
g.cluster:start()
end

g.after_all = function()
g.cluster:stop()
fio.rmtree(g.cluster.datadir)
end

function g.test_doubled_buckets()
local bucket = g.cluster:server('storage-2-1'):exec(function()
return box.space._bucket:select(nil, {limit = 1})[1]
end)

g.cluster:server('storage-1-1'):exec(function(bucket)
box.space._bucket:run_triggers(false)
return box.space._bucket:insert(bucket)
end, {bucket})

t.helpers.retrying({timeout = 20}, function()
t.assert_covers(helpers.list_cluster_issues(g.cluster.main_server), {
{
level = 'warning',
topic = 'vshard',
message = "Cluster has 1 doubled buckets. " ..
"Call require('cartridge.vshard-utils').find_doubled_buckets() for details",
},
})
end)

g.cluster:server('storage-1-1'):exec(function(bucket)
return box.space._bucket:delete(bucket[1])
end, {bucket})

t.helpers.retrying({timeout = 20}, function()
t.assert_covers(helpers.list_cluster_issues(g.cluster.main_server), {})
end)
end
Loading