Skip to content

Commit 12eaee3

Browse files
committed
Add implementation of count() method
This commit introduces count method that: * has arguments and options like `select()`/`pairs()`; * counts number of rows in space with yield by `count_to_yield`; * counts by any conditions. Closes #74
1 parent 667aca6 commit 12eaee3

File tree

8 files changed

+982
-0
lines changed

8 files changed

+982
-0
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
2121
key specified with DDL schema or in `_ddl_sharding_key` space.
2222
NOTE: CRUD methods delete(), get() and update() requires that sharding key
2323
must be a part of primary key.
24+
* `crud.count()` function to calculate the number of tuples
25+
in the space according to conditions.
2426

2527
### Fixed
2628

README.md

+39
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ Table below describe what operations supports custom sharding key:
8787
| `replace()` / `replace_object()` | Yes |
8888
| `upsert()` / `upsert_object()` | Yes |
8989
| `select()` / `pairs()` | Yes |
90+
| `count()` | Yes |
9091
| `update()` | Yes |
9192
| `upsert()` / `upsert_object()` | Yes |
9293
| `replace() / replace_object()` | Yes |
@@ -586,6 +587,44 @@ crud.len('customers')
586587
...
587588
```
588589

590+
### Count
591+
592+
`CRUD` supports multi-conditional count, treating a cluster as a single space.
593+
The same as with `select()` the conditions may include field names or numbers,
594+
as well as index names. The recommended first condition is a TREE index; this
595+
helps to reduce the number of tuples to scan. Otherwise a full scan is performed.
596+
If compared with `len()`, `count()` method scans the entire space to count the
597+
tuples according user conditions.
598+
This method does yield that's why result may be approximate.
599+
600+
```lua
601+
local result, err = crud.count(space_name, conditions, opts)
602+
```
603+
604+
where:
605+
606+
* `space_name` (`string`) - name of the space
607+
* `conditions` (`?table`) - array of [conditions](#select-conditions)
608+
* `opts`:
609+
* `count_to_yield` (`?number`) - number of tuples processed to yield after,
610+
`count_to_yield` should be > 0, default value is 100
611+
* `timeout` (`?number`) - `vshard.call` timeout (in seconds), default value is 2
612+
* `bucket_id` (`?number|cdata`) - bucket ID
613+
* `mode` (`?string`, `read` or `write`) - if `write` is specified then `count` is
614+
performed on master, default value is `read`
615+
* `prefer_replica` (`?boolean`) - if `true` then the preferred target is one of
616+
the replicas, default value is `false`
617+
* `balance` (`?boolean`) - use replica according to
618+
[vshard load balancing policy](https://www.tarantool.io/en/doc/latest/reference/reference_rock/vshard/vshard_api/#router-api-call),
619+
default value is `false`
620+
621+
```lua
622+
crud.count('customers', {{'<=', 'age', 35}})
623+
---
624+
- 5
625+
...
626+
```
627+
589628
## Cartridge roles
590629

591630
`cartridge.roles.crud-storage` is a Tarantool Cartridge role that depends on the

crud.lua

+6
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ local delete = require('crud.delete')
1111
local select = require('crud.select')
1212
local truncate = require('crud.truncate')
1313
local len = require('crud.len')
14+
local count = require('crud.count')
1415
local borders = require('crud.borders')
1516
local sharding_key = require('crud.common.sharding_key')
1617
local utils = require('crud.common.utils')
@@ -76,6 +77,10 @@ crud.truncate = truncate.call
7677
-- @function len
7778
crud.len = len.call
7879

80+
-- @refer count.call
81+
-- @function count
82+
crud.count = count.call
83+
7984
-- @refer borders.min
8085
-- @function min
8186
crud.min = borders.min
@@ -113,6 +118,7 @@ function crud.init_storage()
113118
select.init()
114119
truncate.init()
115120
len.init()
121+
count.init()
116122
borders.init()
117123
sharding_key.init()
118124
end

crud/count.lua

+212
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
local checks = require('checks')
2+
local errors = require('errors')
3+
local vshard = require('vshard')
4+
local fiber = require('fiber')
5+
6+
local call = require('crud.common.call')
7+
local utils = require('crud.common.utils')
8+
local sharding = require('crud.common.sharding')
9+
local filters = require('crud.compare.filters')
10+
local count_plan = require('crud.compare.plan')
11+
local dev_checks = require('crud.common.dev_checks')
12+
local sharding_key_module = require('crud.common.sharding_key')
13+
14+
local compare_conditions = require('crud.compare.conditions')
15+
16+
local CountError = errors.new_class('CountError', {capture_stack = false})
17+
18+
local COUNT_FUNC_NAME = '_crud.count_on_storage'
19+
local DEFAULT_COUNT_TO_YIELD = 100
20+
21+
local count = {}
22+
23+
local function count_on_storage(space_name, index_id, conditions, opts)
24+
dev_checks('string', 'number', '?table', {
25+
scan_value = 'table',
26+
tarantool_iter = 'number',
27+
count_to_yield = '?number',
28+
scan_condition_num = '?number',
29+
})
30+
31+
opts = opts or {}
32+
33+
local space = box.space[space_name]
34+
35+
local index = space.index[index_id]
36+
if index == nil then
37+
return nil, CountError:new("Index with ID %s doesn't exist", index_id)
38+
end
39+
40+
local value = opts.scan_value
41+
42+
local filter_func, err = filters.gen_func(space, conditions, {
43+
tarantool_iter = opts.tarantool_iter,
44+
scan_condition_num = opts.scan_condition_num,
45+
})
46+
if err ~= nil then
47+
return nil, CountError:new("Failed to generate tuples filter: %s", err)
48+
end
49+
50+
local tuples_count = 0
51+
52+
for _, tuple in index:pairs(value, {iterator = opts.tarantool_iter}) do
53+
if tuple == nil then
54+
break
55+
end
56+
57+
local matched, early_exit = filter_func(tuple)
58+
59+
if matched then
60+
tuples_count = tuples_count + 1
61+
62+
if opts.count_to_yield ~= nil and tuples_count % opts.count_to_yield == 0 then
63+
fiber.yield()
64+
end
65+
elseif early_exit then
66+
break
67+
end
68+
end
69+
70+
return tuples_count
71+
end
72+
73+
function count.init()
74+
_G._crud.count_on_storage = count_on_storage
75+
end
76+
77+
--- Calculates the number of tuples by conditions
78+
--
79+
-- @function call
80+
--
81+
-- @param string space_name
82+
-- A space name
83+
--
84+
-- @param ?table user_conditions
85+
-- Conditions by which tuples are counted,
86+
-- default value is nil
87+
--
88+
-- @tparam ?number opts.timeout
89+
-- Function call timeout in seconds,
90+
-- default value is 2 seconds
91+
--
92+
-- @tparam ?number opts.bucket_id
93+
-- Bucket ID
94+
-- default is vshard.router.bucket_id_strcrc32 of primary key
95+
--
96+
-- @tparam ?number opts.count_to_yield
97+
-- Number of tuples processed to yield after,
98+
-- default value is 100
99+
--
100+
-- @tparam ?boolean opts.prefer_replica
101+
-- Call on replica if it's possible,
102+
-- default value is `nil`, which works as with `false`
103+
--
104+
-- @tparam ?boolean opts.balance
105+
-- Use replica according to round-robin load balancing
106+
-- default value is `nil`, which works as with `false`
107+
--
108+
-- @tparam ?string opts.mode
109+
-- vshard call mode, default value is `read`
110+
--
111+
-- @return[1] number
112+
-- @treturn[2] nil
113+
-- @treturn[2] table Error description
114+
--
115+
function count.call(space_name, user_conditions, opts)
116+
checks('string', '?table', {
117+
timeout = '?number',
118+
bucket_id = '?number|cdata',
119+
count_to_yield = '?number',
120+
prefer_replica = '?boolean',
121+
balance = '?boolean',
122+
mode = '?string',
123+
})
124+
125+
opts = opts or {}
126+
127+
if opts.count_to_yield ~= nil and opts.count_to_yield < 1 then
128+
return nil, CountError:new("count_to_yield should be > 0")
129+
end
130+
131+
-- check conditions
132+
local conditions, err = compare_conditions.parse(user_conditions)
133+
if err ~= nil then
134+
return nil, CountError:new("Failed to parse conditions: %s", err)
135+
end
136+
137+
local replicasets, err = vshard.router.routeall()
138+
if err ~= nil then
139+
return nil, CountError:new("Failed to get all replicasets: %s", err)
140+
end
141+
142+
local space = utils.get_space(space_name, replicasets)
143+
if space == nil then
144+
return nil, CountError:new("Space %q doesn't exist", space_name)
145+
end
146+
147+
local sharding_key_as_index_obj, err = sharding_key_module.fetch_on_router(space_name)
148+
if err ~= nil then
149+
return nil, err
150+
end
151+
152+
-- plan count
153+
local plan, err = count_plan.new(space, conditions, {
154+
sharding_key_as_index_obj = sharding_key_as_index_obj,
155+
})
156+
if err ~= nil then
157+
return nil, CountError:new("Failed to plan count: %s", err)
158+
end
159+
160+
local replicasets_to_count = replicasets
161+
if opts.bucket_id ~= nil or plan.sharding_key ~= nil then
162+
local bucket_id = sharding.key_get_bucket_id(plan.sharding_key, opts.bucket_id)
163+
assert(bucket_id ~= nil)
164+
165+
local err
166+
replicasets_to_count, err = sharding.get_replicasets_by_bucket_id(bucket_id)
167+
if err ~= nil then
168+
return nil, err, true
169+
end
170+
end
171+
172+
local count_to_yield = opts.count_to_yield or DEFAULT_COUNT_TO_YIELD
173+
174+
-- call `count_on_storage` on all replicasets
175+
local call_opts = {
176+
mode = opts.mode or 'read',
177+
prefer_replica = opts.prefer_replica,
178+
balance = opts.balance,
179+
timeout = opts.timeout,
180+
replicasets = replicasets_to_count,
181+
}
182+
183+
local count_opts = {
184+
scan_value = plan.scan_value,
185+
tarantool_iter = plan.tarantool_iter,
186+
count_to_yield = count_to_yield,
187+
scan_condition_num = plan.scan_condition_num,
188+
}
189+
190+
local results, err = call.map(COUNT_FUNC_NAME, {
191+
space_name, plan.index_id, plan.conditions, count_opts
192+
}, call_opts)
193+
194+
if err ~= nil then
195+
return nil, CountError:new("Failed to call count on storage-side: %s", err)
196+
end
197+
198+
if results.err ~= nil then
199+
return nil, CountError:new("Failed to call count: %s", err)
200+
end
201+
202+
local total_count = 0
203+
for _, replicaset_results in pairs(results) do
204+
if replicaset_results[1] ~= nil then
205+
total_count = total_count + replicaset_results[1]
206+
end
207+
end
208+
209+
return total_count
210+
end
211+
212+
return count

test/entrypoint/srv_select.lua

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ package.preload['customers-storage'] = function()
101101
})
102102
coord_space:create_index('bucket_id', {
103103
parts = { {field = 'bucket_id'} },
104+
unique = false,
104105
if_not_exists = true,
105106
})
106107

0 commit comments

Comments
 (0)