Skip to content

Commit a03064b

Browse files
committed
config: verify failover and election mode consistency
See the details in the documentation request below. Fixes #9431 @TarantoolBot document Title: config: failover mode and election mode consistency `replication.failover: election` enables RAFT based leader election mechanism on a replicaset. The instances can be configured in the following election modes: `off`, `candidate`, `voter`, `manual`. It is controlled by the `replication.election_mode` parameter. However, the election mode parameter has no sense and confusing for other failover modes (`off`, `manual`, `supervised`). So, it is forbidden to set the election modes other than `off` in failover modes != `election`. Summary: * `replication.failover: off` * `replication.election_mode: off`: OK * `replication.election_mode: candidate`: FAIL * `replication.election_mode: voter`: FAIL * `replication.election_mode: manual`: FAIL * `replication.failover: manual` * `replication.election_mode: off`: OK * `replication.election_mode: candidate`: FAIL * `replication.election_mode: voter`: FAIL * `replication.election_mode: manual`: FAIL * `replication.failover: election` * `replication.election_mode: off`: OK * `replication.election_mode: candidate`: OK * `replication.election_mode: voter`: OK * `replication.election_mode: manual`: OK * `replication.failover: supervised` * `replication.election_mode: off`: OK * `replication.election_mode: candidate`: FAIL * `replication.election_mode: voter`: FAIL * `replication.election_mode: manual`: FAIL
1 parent 74bcbc7 commit a03064b

File tree

3 files changed

+206
-0
lines changed

3 files changed

+206
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
## bugfix/config
2+
3+
* Now `replication.election_mode` values other than `off` are possible only if
4+
`replication.failover` value is `election` (gh-9431).

src/box/lua/config/configdata.lua

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,38 @@ local function validate_failover(found, peers, failover, leader)
411411
leader, found.replicaset_name, found.group_name, leader), 0)
412412
end
413413
end
414+
415+
-- Verify that 'election_mode' option is set to a value other
416+
-- than 'off' only in the 'failover: election' mode.
417+
--
418+
-- The alternative would be silent ignoring the election
419+
-- mode if failover mode is not 'election'.
420+
--
421+
-- For a while, a simple and straightforward approach is
422+
-- chosen: let the user create an explicit consistent
423+
-- configuration manually.
424+
--
425+
-- We can relax it in a future, though. For example, if two
426+
-- conflicting options are set in different scopes, we can
427+
-- ignore one from the outer scope.
428+
if failover ~= 'election' then
429+
for peer_name, peer in pairs(peers) do
430+
local election_mode = instance_config:get(peer.iconfig_def,
431+
'replication.election_mode')
432+
if election_mode ~= nil and election_mode ~= 'off' then
433+
error(('replication.election_mode = %q is set for instance ' ..
434+
'%q of replicaset %q of group %q, but this option is ' ..
435+
'only applicable if replication.failover = "election"; ' ..
436+
'the replicaset is configured with replication.failover ' ..
437+
'= %q; if this particular instance requires its own ' ..
438+
'election mode, for example, if it is an anonymous ' ..
439+
'replica, consider configuring the election mode ' ..
440+
'specifically for this particular instance'):format(
441+
election_mode, peer_name, found.replicaset_name,
442+
found.group_name, failover), 0)
443+
end
444+
end
445+
end
414446
end
415447

416448
local function new(iconfig, cconfig, instance_name)
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
-- Verify all the compositions of possible replication.failover
2+
-- and replication.election_mode values.
3+
--
4+
-- If the election mode is set to a value other than null or 'off'
5+
-- and the failover mode is not 'election', the configuration is
6+
-- considered incorrect.
7+
--
8+
-- All the other cases are allowed.
9+
10+
local t = require('luatest')
11+
local cbuilder = require('test.config-luatest.cbuilder')
12+
local replicaset = require('test.config-luatest.replicaset')
13+
14+
local g = t.group()
15+
16+
g.before_all(replicaset.init)
17+
g.after_each(replicaset.drop)
18+
g.after_all(replicaset.clean)
19+
20+
-- Ease writing of a long error message in a code.
21+
local function toline(s)
22+
return s:gsub('\n', ''):gsub(' +', ' '):strip()
23+
end
24+
25+
-- An error that should appear if replication.failover and
26+
-- replication.election_mode are conflicting.
27+
--
28+
-- It a template and it should parametrized with election mode and
29+
-- failover values: error_t:format(election_mode, failover).
30+
--
31+
-- Nuances:
32+
--
33+
-- * In general, a missed value is shown as 'nil', while a null
34+
-- value is shown as 'cdata<void *>: NULL' if there is no
35+
-- default value in the schema.
36+
-- * If replication.election_mode is missed or null, the message
37+
-- can't appear.
38+
-- * If replication.failover is missed or null, it is shown as
39+
-- 'off', because it is the default value.
40+
local error_t = toline([[
41+
replication.election_mode = %q is set for instance "instance-004" of
42+
replicaset "replicaset-001" of group "group-001", but this option is
43+
only applicable if replication.failover = "election"; the replicaset is
44+
configured with replication.failover = %q; if this particular instance
45+
requires its own election mode, for example, if it is an anonymous
46+
replica, consider configuring the election mode specifically for this
47+
particular instance
48+
]])
49+
50+
-- Configure three usual instances and one with specific election
51+
-- mode.
52+
--
53+
-- All the instances are in a replicaset with the given failover
54+
-- mode.
55+
local function build_config(failover, election_mode)
56+
local cb = cbuilder.new()
57+
:set_replicaset_option('replication.failover', failover)
58+
:add_instance('instance-001', {})
59+
:add_instance('instance-002', {})
60+
:add_instance('instance-003', {})
61+
:add_instance('instance-004', {
62+
replication = {
63+
election_mode = election_mode,
64+
},
65+
})
66+
67+
if failover == nil or failover == 'off' then
68+
cb:set_instance_option('instance-001', 'database.mode', 'rw')
69+
end
70+
71+
if failover == 'manual' then
72+
cb:set_replicaset_option('leader', 'instance-001')
73+
end
74+
75+
return cb:config()
76+
end
77+
78+
-- Verify that the given incorrect configuration is reported as
79+
-- startup error.
80+
local function failure_case(failover, election_mode)
81+
return function(g)
82+
local config = build_config(failover, election_mode)
83+
84+
-- The error message shows the replication.failover
85+
-- parameter by its default value if it is missed or null.
86+
--
87+
-- See comments for the `error_t` template for details.
88+
if failover == nil then
89+
failover = 'off'
90+
end
91+
92+
replicaset.startup_error(g, config, error_t:format(
93+
election_mode, failover))
94+
end
95+
end
96+
97+
-- Verify that the given correct configuration allows to start a
98+
-- replicaset successfully.
99+
local function success_case(failover, election_mode)
100+
return function(g)
101+
local config = build_config(failover, election_mode)
102+
local replicaset = replicaset.new(g, config)
103+
replicaset:start()
104+
105+
replicaset['instance-004']:exec(function(failover, election_mode)
106+
-- The effective default value is 'off' or
107+
-- 'candidate'.
108+
if election_mode == nil then
109+
election_mode = failover == 'election' and 'candidate' or 'off'
110+
end
111+
t.assert_equals(box.cfg.election_mode, election_mode)
112+
end, {failover, election_mode})
113+
end
114+
end
115+
116+
-- Whether the given replication.failover value enables RAFT-based
117+
-- leader election.
118+
local failover_enables_election = {
119+
missed = false,
120+
null = false,
121+
off = false,
122+
manual = false,
123+
election = true,
124+
supervised = false,
125+
}
126+
127+
-- Whether the given replication.election_mode value requires the
128+
-- RAFT-based leader election be enabled.
129+
local election_mode_requires_election = {
130+
missed = false,
131+
null = false,
132+
off = false,
133+
voter = true,
134+
manual = true,
135+
candidate = true,
136+
}
137+
138+
-- The nil and box.NULL values are encoded as 'missed' and 'null'
139+
-- in a test case parameters to use as a table keys. The function
140+
-- decodes them back.
141+
local function decode_missed_and_null(v)
142+
if v == 'missed' then
143+
return nil
144+
end
145+
if v == 'null' then
146+
return box.NULL
147+
end
148+
return v
149+
end
150+
151+
-- Glue all the code above together and define test cases.
152+
for failover_str, enabled in pairs(failover_enables_election) do
153+
for election_mode_str, required in pairs(election_mode_requires_election) do
154+
local expect_failure = required and not enabled
155+
local case_name = 'test_' .. table.concat({
156+
'failover', failover_str,
157+
'election_mode', election_mode_str,
158+
'expect', expect_failure and 'failure' or 'success',
159+
}, '_')
160+
161+
local failover = decode_missed_and_null(failover_str)
162+
local election_mode = decode_missed_and_null(election_mode_str)
163+
164+
if expect_failure then
165+
g[case_name] = failure_case(failover, election_mode)
166+
else
167+
g[case_name] = success_case(failover, election_mode)
168+
end
169+
end
170+
end

0 commit comments

Comments
 (0)