Skip to content

Commit f3aa7c9

Browse files
committed
Provide an API to get storages initialization state
There is an issue with using CRUD functionality if not all storages are up. New function is added to get the information about storages state: initialized or not. So, a user can poll state and wait for storages to be initialized before making CRUD calls. Resolves #229
1 parent 3b4609b commit f3aa7c9

File tree

7 files changed

+233
-5
lines changed

7 files changed

+233
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
88
## [Unreleased]
99

1010
### Added
11+
* `crud.storage_info` function to get storages status (#229).
1112

1213
### Changed
1314

README.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ It also provides the `crud-storage` and `crud-router` roles for
3232
- [Cut extra objects](#cut-extra-objects)
3333
- [Truncate](#truncate)
3434
- [Len](#len)
35+
- [Storage info](#storage-info)
3536
- [Count](#count)
3637
- [Call options for crud methods](#call-options-for-crud-methods)
3738
- [Statistics](#statistics)
@@ -1074,6 +1075,47 @@ crud.len('customers')
10741075
...
10751076
```
10761077

1078+
### Storage info
1079+
1080+
```lua
1081+
-- Get storages status
1082+
local result, err = crud.storage_info(opts)
1083+
```
1084+
1085+
where:
1086+
1087+
* `opts`:
1088+
* `timeout` (`?number`) - maximum time (in seconds) to wait for response from
1089+
cluster instances.
1090+
1091+
Returns storages status table by instance UUID or nil with error. Status table fields:
1092+
"status" contains a string representing the status:
1093+
* "running" - storage is initialized.
1094+
* "uninitialized" - storage is not initialized or disabled.
1095+
* "error" - error getting the status from a remote instance. Connection error, for example.
1096+
"is_master" is true if an instance is a master. False - otherwise.
1097+
1098+
1099+
**Example:**
1100+
1101+
```lua
1102+
crud.storage_info()
1103+
```
1104+
```
1105+
---
1106+
- 5c3392a3-ce89-4aec-83f3-6cb5f18e60c3:
1107+
status: error
1108+
is_master: true
1109+
376435fc-7871-4686-9817-75df1a093e41:
1110+
status: running
1111+
is_master: false
1112+
afe7f578-943f-4bd9-b636-6356760f6586:
1113+
status: uninitialized
1114+
is_master: true
1115+
...
1116+
```
1117+
1118+
10771119
### Count
10781120

10791121
`CRUD` supports multi-conditional count, treating a cluster as a single space.

crud.lua

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ crud.stats = stats.get
138138
-- @function reset_stats
139139
crud.reset_stats = stats.reset
140140

141+
-- @refer utils.storage_info
142+
-- @function storage_info
143+
crud.storage_info = utils.storage_info
144+
141145
--- Initializes crud on node
142146
--
143147
-- Exports all functions that are used for calls
@@ -165,6 +169,8 @@ function crud.init_storage()
165169
count.init()
166170
borders.init()
167171
sharding_metadata.init()
172+
173+
_G._crud.is_initialized = function() return true end
168174
end
169175

170176
function crud.init_router()

crud/common/call.lua

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ local dev_checks = require('crud.common.dev_checks')
55
local utils = require('crud.common.utils')
66
local sharding_utils = require('crud.common.sharding.utils')
77
local fiber_clock = require('fiber').clock
8+
local const = require('crud.common.const')
89

910
local BaseIterator = require('crud.common.map_call_cases.base_iter')
1011
local BasePostprocessor = require('crud.common.map_call_cases.base_postprocessor')
@@ -13,8 +14,6 @@ local CallError = errors.new_class('CallError')
1314

1415
local call = {}
1516

16-
call.DEFAULT_VSHARD_CALL_TIMEOUT = 2
17-
1817
function call.get_vshard_call_name(mode, prefer_replica, balance)
1918
dev_checks('string', '?boolean', '?boolean')
2019

@@ -84,7 +83,7 @@ function call.map(func_name, func_args, opts)
8483
return nil, err
8584
end
8685

87-
local timeout = opts.timeout or call.DEFAULT_VSHARD_CALL_TIMEOUT
86+
local timeout = opts.timeout or const.DEFAULT_VSHARD_CALL_TIMEOUT
8887

8988
local iter = opts.iter
9089
if iter == nil then
@@ -149,7 +148,7 @@ function call.single(bucket_id, func_name, func_args, opts)
149148
return nil, err
150149
end
151150

152-
local timeout = opts.timeout or call.DEFAULT_VSHARD_CALL_TIMEOUT
151+
local timeout = opts.timeout or const.DEFAULT_VSHARD_CALL_TIMEOUT
153152

154153
local res, err = vshard.router[vshard_call_name](bucket_id, func_name, func_args, {
155154
timeout = timeout,
@@ -171,7 +170,7 @@ function call.any(func_name, func_args, opts)
171170
timeout = '?number',
172171
})
173172

174-
local timeout = opts.timeout or call.DEFAULT_VSHARD_CALL_TIMEOUT
173+
local timeout = opts.timeout or const.DEFAULT_VSHARD_CALL_TIMEOUT
175174

176175
local replicasets, err = vshard.router.routeall()
177176
if replicasets == nil then

crud/common/const.lua

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ const.SHARDING_RELOAD_RETRIES_NUM = 1
88
const.NEED_SCHEMA_RELOAD = 0x0001000
99
const.NEED_SHARDING_RELOAD = 0x0001001
1010

11+
const.DEFAULT_VSHARD_CALL_TIMEOUT = 2
12+
1113
return const

crud/common/utils.lua

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ local ffi = require('ffi')
33
local vshard = require('vshard')
44
local fun = require('fun')
55
local bit = require('bit')
6+
local log = require('log')
67

78
local const = require('crud.common.const')
89
local schema = require('crud.common.schema')
@@ -15,6 +16,8 @@ local ShardingError = errors.new_class('ShardingError', {capture_stack = false})
1516
local GetSpaceFormatError = errors.new_class('GetSpaceFormatError', {capture_stack = false})
1617
local FilterFieldsError = errors.new_class('FilterFieldsError', {capture_stack = false})
1718
local NotInitializedError = errors.new_class('NotInitialized')
19+
local GetReplicaStateError = errors.new_class('GetStorageStateError')
20+
local fiber_clock = require('fiber').clock
1821

1922
local utils = {}
2023

@@ -748,4 +751,65 @@ function utils.list_slice(list, start_index, end_index)
748751
return slice
749752
end
750753

754+
--- Polls replicas for storage state
755+
--
756+
-- @function storage_info
757+
--
758+
-- @tparam ?number opts.timeout
759+
-- Function call timeout
760+
--
761+
-- @return a table of storage states by replica uuid.
762+
function utils.storage_info(opts)
763+
local replicasets, err = vshard.router.routeall()
764+
if replicasets == nil then
765+
return nil, GetReplicaStateError:new("Failed to get all replicasets: %s", err.err)
766+
end
767+
768+
opts = opts or {}
769+
770+
local futures_by_replicas = {}
771+
local replica_state_by_uuid = {}
772+
local async_opts = {is_async = true}
773+
local timeout = opts.timeout or const.DEFAULT_VSHARD_CALL_TIMEOUT
774+
775+
for _, replicaset in pairs(replicasets) do
776+
for replica_uuid, replica in pairs(replicaset.replicas) do
777+
replica_state_by_uuid[replica_uuid] = {status = "error",
778+
is_master = replicaset.master == replica}
779+
local ok, res = pcall(replica.conn.call, replica.conn, "_crud.is_initialized",
780+
{}, async_opts)
781+
if ok then
782+
futures_by_replicas[replica_uuid] = res
783+
elseif res ~= nil then
784+
log.error("Error getting storage info for %s: %s", replica_uuid, res)
785+
end
786+
end
787+
end
788+
789+
local deadline = fiber_clock() + timeout
790+
for replica_uuid, future in pairs(futures_by_replicas) do
791+
local wait_timeout = deadline - fiber_clock()
792+
if wait_timeout < 0 then
793+
wait_timeout = 0
794+
end
795+
796+
local result, err = future:wait_result(wait_timeout)
797+
if result == nil then
798+
future:discard()
799+
if err ~= nil then
800+
local str_err = tostring(err)
801+
if (string.find(str_err, " is not defined") ~= nil) then
802+
replica_state_by_uuid[replica_uuid].status = "uninitialized"
803+
else
804+
log.error("Error getting storage info for %s: %s", replica_uuid, err)
805+
end
806+
end
807+
else
808+
replica_state_by_uuid[replica_uuid].status = result[1] and "running" or "uninitialized"
809+
end
810+
end
811+
812+
return replica_state_by_uuid
813+
end
814+
751815
return utils
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
local fio = require('fio')
2+
3+
local t = require('luatest')
4+
5+
local helpers = require('test.helper')
6+
7+
local fiber = require("fiber")
8+
9+
local pgroup = t.group('replicas_state', {
10+
{engine = 'memtx'}
11+
})
12+
13+
local all_storages_initialized = false
14+
15+
local function wait_storages_init(g)
16+
local storages_initialized = false
17+
local attempts_left = 5
18+
local wait_for_init_timeout = 1
19+
while (attempts_left > 0 and not storages_initialized) do
20+
local results, err = g.cluster.main_server.net_box:call("crud.storage_info", {})
21+
t.assert_equals(err, nil, "Error getting storage status")
22+
storages_initialized = true
23+
for _,v in pairs(results) do
24+
if v.status ~= "running" then
25+
storages_initialized = false
26+
end
27+
end
28+
if not storages_initialized then
29+
fiber.sleep(wait_for_init_timeout)
30+
attempts_left = attempts_left-1
31+
end
32+
end
33+
return storages_initialized
34+
end
35+
36+
pgroup.before_all(function(g)
37+
g.cluster = helpers.Cluster:new({
38+
datadir = fio.tempdir(),
39+
server_command = helpers.entrypoint('srv_select'),
40+
use_vshard = true,
41+
replicasets = helpers.get_test_replicasets(),
42+
env = {
43+
['ENGINE'] = g.params.engine,
44+
},
45+
})
46+
g.cluster:start()
47+
48+
-- wait for storages to initialize
49+
all_storages_initialized = wait_storages_init(g)
50+
end)
51+
52+
pgroup.after_all(function(g)
53+
helpers.stop_cluster(g.cluster)
54+
fio.rmtree(g.cluster.datadir)
55+
end)
56+
57+
pgroup.test_crud_storage_status_of_stopped_servers = function(g)
58+
t.assert_equals(all_storages_initialized, true)
59+
60+
g.cluster:server("s2-replica"):stop()
61+
62+
local results, err = g.cluster.main_server.net_box:call("crud.storage_info", {})
63+
t.assert_equals(err, nil, "Error getting storags states")
64+
65+
local instance = results[helpers.uuid('b', 1)]
66+
t.assert_equals(instance.status, "running")
67+
t.assert_equals(instance.is_master, true)
68+
69+
local instance = results[helpers.uuid('b', 2)]
70+
t.assert_equals(instance.is_master, false)
71+
72+
instance = results[helpers.uuid('c', 1)]
73+
t.assert_equals(instance.status, "running")
74+
t.assert_equals(instance.is_master, true)
75+
76+
instance = results[helpers.uuid('c', 2)]
77+
t.assert_equals(instance.status, "error") -- peer closed
78+
t.assert_equals(instance.is_master, false)
79+
80+
g.cluster:server("s2-replica"):start()
81+
end
82+
83+
pgroup.test_disabled_storage_role = function(g)
84+
t.assert_equals(wait_storages_init(g), true)
85+
86+
-- stop crud storage role on one replica
87+
local server = g.cluster:server("s1-replica")
88+
local results = server.net_box:eval([[
89+
local serviceregistry = require("cartridge.service-registry")
90+
serviceregistry.get("crud-storage").stop()
91+
return true
92+
]])
93+
94+
t.assert_not_equals(results, nil, "Fail to disable storage role")
95+
96+
local results, err = g.cluster.main_server.net_box:call("crud.storage_info", {})
97+
t.assert_equals(err, nil, "Error getting storags states")
98+
99+
local instance = results[helpers.uuid('b', 1)]
100+
t.assert_equals(instance.status, "running")
101+
t.assert_equals(instance.is_master, true)
102+
103+
instance = results[helpers.uuid('b', 2)]
104+
t.assert_equals(instance.status, "uninitialized")
105+
t.assert_equals(instance.is_master, false)
106+
107+
instance = results[helpers.uuid('c', 1)]
108+
t.assert_equals(instance.status, "running")
109+
t.assert_equals(instance.is_master, true)
110+
111+
instance = results[helpers.uuid('c', 2)]
112+
t.assert_equals(instance.status, "running")
113+
t.assert_equals(instance.is_master, false)
114+
end

0 commit comments

Comments
 (0)