Skip to content

bpo-32037: Use the INT opcode for 32-bit integers in protocol 0 pickles. #4407

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 16, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Lib/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,10 @@ def save_long(self, obj):
else:
self.write(LONG4 + pack("<i", n) + encoded)
return
self.write(LONG + repr(obj).encode("ascii") + b'L\n')
if -0x80000000 <= obj <= 0x7fffffff:
self.write(INT + repr(obj).encode("ascii") + b'\n')
else:
self.write(LONG + repr(obj).encode("ascii") + b'L\n')
dispatch[int] = save_long

def save_float(self, obj):
Expand Down
72 changes: 36 additions & 36 deletions Lib/pickletools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2480,35 +2480,35 @@ def __init__(self, value):
0: ( MARK
1: l LIST (MARK at 0)
2: p PUT 0
5: L LONG 1
9: a APPEND
10: L LONG 2
14: a APPEND
15: ( MARK
16: L LONG 3
20: L LONG 4
24: t TUPLE (MARK at 15)
25: p PUT 1
28: a APPEND
29: ( MARK
30: d DICT (MARK at 29)
31: p PUT 2
34: c GLOBAL '_codecs encode'
50: p PUT 3
53: ( MARK
54: V UNICODE 'abc'
59: p PUT 4
62: V UNICODE 'latin1'
70: p PUT 5
73: t TUPLE (MARK at 53)
74: p PUT 6
77: R REDUCE
78: p PUT 7
81: V UNICODE 'def'
86: p PUT 8
89: s SETITEM
90: a APPEND
91: . STOP
5: I INT 1
8: a APPEND
9: I INT 2
12: a APPEND
13: ( MARK
14: I INT 3
17: I INT 4
20: t TUPLE (MARK at 13)
21: p PUT 1
24: a APPEND
25: ( MARK
26: d DICT (MARK at 25)
27: p PUT 2
30: c GLOBAL '_codecs encode'
46: p PUT 3
49: ( MARK
50: V UNICODE 'abc'
55: p PUT 4
58: V UNICODE 'latin1'
66: p PUT 5
69: t TUPLE (MARK at 49)
70: p PUT 6
73: R REDUCE
74: p PUT 7
77: V UNICODE 'def'
82: p PUT 8
85: s SETITEM
86: a APPEND
87: . STOP
highest protocol among opcodes = 0

Try again with a "binary" pickle.
Expand Down Expand Up @@ -2577,13 +2577,13 @@ def __init__(self, value):
93: p PUT 6
96: V UNICODE 'value'
103: p PUT 7
106: L LONG 42
111: s SETITEM
112: b BUILD
113: a APPEND
114: g GET 5
117: a APPEND
118: . STOP
106: I INT 42
110: s SETITEM
111: b BUILD
112: a APPEND
113: g GET 5
116: a APPEND
117: . STOP
highest protocol among opcodes = 0

>>> dis(pickle.dumps(x, 1))
Expand Down
6 changes: 3 additions & 3 deletions Lib/test/pickletester.py
Original file line number Diff line number Diff line change
Expand Up @@ -1821,7 +1821,7 @@ def test_simple_newobj(self):
with self.subTest(proto=proto):
s = self.dumps(x, proto)
if proto < 1:
self.assertIn(b'\nL64206', s) # LONG
self.assertIn(b'\nI64206', s) # INT
else:
self.assertIn(b'M\xce\xfa', s) # BININT2
self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
Expand All @@ -1837,7 +1837,7 @@ def test_complex_newobj(self):
with self.subTest(proto=proto):
s = self.dumps(x, proto)
if proto < 1:
self.assertIn(b'\nL64206', s) # LONG
self.assertIn(b'\nI64206', s) # INT
elif proto < 2:
self.assertIn(b'M\xce\xfa', s) # BININT2
elif proto < 4:
Expand All @@ -1857,7 +1857,7 @@ def test_complex_newobj_ex(self):
with self.subTest(proto=proto):
s = self.dumps(x, proto)
if proto < 1:
self.assertIn(b'\nL64206', s) # LONG
self.assertIn(b'\nI64206', s) # INT
elif proto < 2:
self.assertIn(b'M\xce\xfa', s) # BININT2
elif proto < 4:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Integers that fit in a signed 32-bit integer will be now pickled with
protocol 0 using the INT opcode. This will decrease the size of a pickle,
speed up pickling and unpickling, and make these integers be unpickled as
int instances in Python 2.
46 changes: 23 additions & 23 deletions Modules/_pickle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1858,18 +1858,13 @@ save_long(PicklerObject *self, PyObject *obj)
PyObject *repr = NULL;
Py_ssize_t size;
long val;
int overflow;
int status = 0;

const char long_op = LONG;

val= PyLong_AsLong(obj);
if (val == -1 && PyErr_Occurred()) {
/* out of range for int pickling */
PyErr_Clear();
}
else if (self->bin &&
(sizeof(long) <= 4 ||
(val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) {
val= PyLong_AsLongAndOverflow(obj, &overflow);
if (!overflow && (sizeof(long) <= 4 ||
(val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
{
/* result fits in a signed 4-byte integer.

Note: we can't use -0x80000000L in the above condition because some
Expand All @@ -1882,31 +1877,35 @@ save_long(PicklerObject *self, PyObject *obj)
char pdata[32];
Py_ssize_t len = 0;

pdata[1] = (unsigned char)(val & 0xff);
pdata[2] = (unsigned char)((val >> 8) & 0xff);
pdata[3] = (unsigned char)((val >> 16) & 0xff);
pdata[4] = (unsigned char)((val >> 24) & 0xff);

if ((pdata[4] == 0) && (pdata[3] == 0)) {
if (pdata[2] == 0) {
pdata[0] = BININT1;
len = 2;
if (self->bin) {
pdata[1] = (unsigned char)(val & 0xff);
pdata[2] = (unsigned char)((val >> 8) & 0xff);
pdata[3] = (unsigned char)((val >> 16) & 0xff);
pdata[4] = (unsigned char)((val >> 24) & 0xff);

if ((pdata[4] != 0) || (pdata[3] != 0)) {
pdata[0] = BININT;
len = 5;
}
else {
else if (pdata[2] != 0) {
pdata[0] = BININT2;
len = 3;
}
else {
pdata[0] = BININT1;
len = 2;
}
}
else {
pdata[0] = BININT;
len = 5;
sprintf(pdata, "%c%ld\n", INT, val);
len = strlen(pdata);
}

if (_Pickler_Write(self, pdata, len) < 0)
return -1;

return 0;
}
assert(!PyErr_Occurred());

if (self->proto >= 2) {
/* Linear-time pickling. */
Expand Down Expand Up @@ -1986,6 +1985,7 @@ save_long(PicklerObject *self, PyObject *obj)
goto error;
}
else {
const char long_op = LONG;
const char *string;

/* proto < 2: write the repr and newline. This is quadratic-time (in
Expand Down