Skip to content

Commit 03175d2

Browse files
committed
[FIX] spreadsheet: batch process spreadsheet_revision.commands
Some dbs have `spreadsheet_revision` records with over 10 millions characters in `commands`. If the number of record is high, this leads to memory errors. We distribute them in buckets of `memory_cap` maximum size, and use a named cursor to process them in buckets. Commands larger than `memory_cap` fit in one bucket.
1 parent 86409e5 commit 03175d2

File tree

1 file changed

+37
-23
lines changed

1 file changed

+37
-23
lines changed

src/util/spreadsheet/misc.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,46 @@
1-
from .. import json
1+
from .. import json, pg
2+
3+
MEMORY_CAP = 2 * 10**8 # 200Mo
24

35

46
def iter_commands(cr, like_all=(), like_any=()):
57
if not (bool(like_all) ^ bool(like_any)):
68
raise ValueError("Please specify `like_all` or `like_any`, not both")
7-
cr.execute(
8-
"""
9-
SELECT id,
10-
commands
11-
FROM spreadsheet_revision
12-
WHERE commands LIKE {}(%s::text[])
13-
""".format("ALL" if like_all else "ANY"),
14-
[list(like_all or like_any)],
15-
)
16-
for revision_id, data in cr.fetchall():
17-
data_loaded = json.loads(data)
18-
if "commands" not in data_loaded:
19-
continue
20-
data_old = json.dumps(data_loaded, sort_keys=True)
21-
22-
changed = yield data_loaded["commands"]
23-
if changed is None:
24-
changed = data_old != json.dumps(data_loaded, sort_keys=True)
25-
26-
if changed:
27-
cr.execute(
28-
"UPDATE spreadsheet_revision SET commands=%s WHERE id=%s", [json.dumps(data_loaded), revision_id]
9+
10+
with pg.named_cursor(cr, itersize=1) as ncr:
11+
ncr.execute(
12+
"""
13+
WITH buckets AS (
14+
SELECT id,
15+
SUM(LENGTH(commands)) OVER (ORDER BY id) / {memory_cap} AS num,
16+
commands
17+
FROM spreadsheet_revision
18+
WHERE commands LIKE {condition}(%s::text[])
19+
ORDER BY id
2920
)
21+
SELECT ARRAY_AGG(id ORDER BY id),
22+
ARRAY_AGG(commands ORDER BY id)
23+
FROM buckets
24+
GROUP BY num
25+
""".format(memory_cap=MEMORY_CAP, condition="ALL" if like_all else "ANY"),
26+
[list(like_all or like_any)],
27+
)
28+
for ids, commands in ncr:
29+
for revision_id, data in zip(ids, commands):
30+
data_loaded = json.loads(data)
31+
if "commands" not in data_loaded:
32+
continue
33+
data_old = json.dumps(data_loaded, sort_keys=True)
34+
35+
changed = yield data_loaded["commands"]
36+
if changed is None:
37+
changed = data_old != json.dumps(data_loaded, sort_keys=True)
38+
39+
if changed:
40+
cr.execute(
41+
"UPDATE spreadsheet_revision SET commands=%s WHERE id=%s",
42+
[json.dumps(data_loaded), revision_id],
43+
)
3044

3145

3246
def process_commands(cr, callback, *args, **kwargs):

0 commit comments

Comments
 (0)