From 030284009d7620fa8131dea752edefc9ca55e6f8 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 12 Feb 2024 14:46:39 -0800 Subject: [PATCH 1/8] Fix the API name in the doc example, improve test. This also makes the example more realistic, people are unlikely to want this API to get a value into a simple `int` type, we've got direct APIs for that. So have the example use an array as if it is a bignum of its own. I start by fixing the API name in the doc example as scoder@ noted in the original code review after merge. But I noticed one thing in the test that could be done better so I added that as well: we need to guarantee that all bytes of the result are overwritten. This now pre-fills the result with data in order to ensure that. _(assuming ctypes isn't undoing that behind the scenes...)_ --- Doc/c-api/long.rst | 10 +++++----- Lib/test/test_capi/test_long.py | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index c39823e5e6787f..d03991602b3b34 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -358,14 +358,14 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Copy the Python integer value to a native *buffer* of size *n_bytes*:: - int value; - Py_ssize_t bytes = PyLong_CopyBits(v, &value, sizeof(value), -1); + unsigned bignum[4]; // Example size chosen by random die roll. + Py_ssize_t bytes = PyLong_AsNativeBits(v, &bignum, sizeof(bignum), -1); if (bytes < 0) { - // Error occurred + // A Python exception was set with the reason. return NULL; } - else if (bytes > sizeof(value)) { - // Overflow occurred, but 'value' contains as much as could fit + else if (bytes > sizeof(bignum)) { + // Overflow occurred, but 'bignum' contains as much as could fit. } *endianness* may be passed ``-1`` for the native endian that CPython was diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index fc82cbfa66ea7a..8d8b7a34de0906 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -510,7 +510,9 @@ def test_long_asnativebytes(self): ]: with self.subTest(f"{v:X}-{len(expect_be)}bytes"): n = len(expect_be) - buffer = bytearray(n) + # Fill the buffer with dummy data to ensure all bytes + # are overwritten. + buffer = bytearray(b'\xa5'*n) expect_le = expect_be[::-1] self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0), From 509961645678ebce4021015c15ee5d38d938e9d6 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 12 Feb 2024 15:04:01 -0800 Subject: [PATCH 2/8] further test refinements. --- Lib/test/test_capi/test_long.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 8d8b7a34de0906..edef8de9d47b41 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -453,12 +453,21 @@ def test_long_asnativebytes(self): (-(2**256-1), 33), ]: with self.subTest(f"sizeof-{v:X}"): - buffer = bytearray(1) + buffer = bytearray(b"\x5a") self.assertEqual(expect, asnativebytes(v, buffer, 0, -1), - "PyLong_AsNativeBytes(v, NULL, 0, -1)") + "PyLong_AsNativeBytes(v, , 0, -1)") # Also check via the __index__ path self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1), - "PyLong_AsNativeBytes(Index(v), NULL, 0, -1)") + "PyLong_AsNativeBytes(Index(v), , 0, -1)") + + # Test that we populate n=2 bytes but do not overwrite more. + buffer = bytearray(b"\x99"*3) + self.assertEqual(2, asnativebytes(4, buffer, 2, 0), # BE + "PyLong_AsNativeBytes(v, <3 byte buffer>, 2, 0) // BE") + self.assertEqual(buffer, b"\x00\x04\x99") + self.assertEqual(2, asnativebytes(4, buffer, 2, 1), # LE + "PyLong_AsNativeBytes(v, <3 byte buffer>, 2, 1) // LE") + self.assertEqual(buffer, b"\x04\x00\x99") # We request as many bytes as `expect_be` contains, and always check # the result (both big and little endian). We check the return value @@ -512,7 +521,7 @@ def test_long_asnativebytes(self): n = len(expect_be) # Fill the buffer with dummy data to ensure all bytes # are overwritten. - buffer = bytearray(b'\xa5'*n) + buffer = bytearray(b"\xa5"*n) expect_le = expect_be[::-1] self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0), From 97292bac535713c7b5c20b1951508ba2ae25745e Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 12 Feb 2024 15:15:06 -0800 Subject: [PATCH 3/8] One more assertion against n=0 writes. --- Lib/test/test_capi/test_long.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index edef8de9d47b41..4b4d3b62f72555 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -456,9 +456,13 @@ def test_long_asnativebytes(self): buffer = bytearray(b"\x5a") self.assertEqual(expect, asnativebytes(v, buffer, 0, -1), "PyLong_AsNativeBytes(v, , 0, -1)") + self.assertEqual(buffer, b"\x5a", + "buffer overwritten when it should not have been") # Also check via the __index__ path self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1), "PyLong_AsNativeBytes(Index(v), , 0, -1)") + self.assertEqual(buffer, b"\x5a", + "buffer overwritten when it should not have been") # Test that we populate n=2 bytes but do not overwrite more. buffer = bytearray(b"\x99"*3) From 71d527fedb296954205fefba665f81cd0a9683cc Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Fri, 16 Feb 2024 20:40:21 -0800 Subject: [PATCH 4/8] Clarify, document both use cases. --- Doc/c-api/long.rst | 71 ++++++++++++++++++++++++++------- Lib/test/test_capi/test_long.py | 7 +++- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 07918ce551ba52..d36d0b0eac7d6e 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -358,8 +358,8 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Copy the Python integer value to a native *buffer* of size *n_bytes*:: - unsigned bignum[4]; // Example size chosen by random die roll. - Py_ssize_t bytes = PyLong_AsNativeBits(v, &bignum, sizeof(bignum), -1); + int32_t value; + Py_ssize_t bytes = PyLong_AsNativeBits(pylong, &value, sizeof(value), -1); if (bytes < 0) { // A Python exception was set with the reason. return NULL; @@ -368,36 +368,77 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. // Success! } else { - // Overflow occurred, but 'bignum' contains a truncated value. + // Overflow occurred, but 'value' contains the truncated + // lowest bits of pylong. } + The above example may look *similar* to + :c:func:`PyLong_As* ` + but instead fills in a specific caller defined type and never raises an + error about of the :class:`int` *pylong*'s value regardless of *n_bytes* + or the returned byte count. + + To get at the entire potentially big Python value, this can be used to + reserve enough space and copy it:: + + // Ask how much space we need. + Py_ssize_t expected = PyLong_AsNativeBits(pylong, NULL, 0, -1); + if (expected < 0) { + // A Python exception was set with the reason. + return NULL; + } + assert(expected != 0); // Impossible per the API definition. + uint8_t *bignum = malloc(expected); + if (!bignum) { + PyErr_SetString(PyExc_MemoryError, "bignum malloc failed."); + return NULL; + } + // Safely get the entire value. + Py_ssize_t bytes = PyLong_AsNativeBits(pylong, bignum, expected, -1); + if (bytes < 0 || bytes > expected) { // Be safe, should not be possible. + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, + "Unexpected bignum truncation after a size check."); + } + free(bignum); + return NULL; + } + // The expected success given the above pre-check. + // ... use bignum ... + free(bignum); + *endianness* may be passed ``-1`` for the native endian that CPython was compiled with, or ``0`` for big endian and ``1`` for little. - Return ``-1`` with an exception raised if *pylong* cannot be interpreted as + Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as an integer. Otherwise, return the size of the buffer required to store the value. If this is equal to or less than *n_bytes*, the entire value was - copied. + copied. ``0`` will never be returned. - Unless an exception is raised, all *n_bytes* of the buffer will be written - with as much of the value as can fit. This allows the caller to ignore all - non-negative results if the intent is to match the typical behavior of a - C-style downcast. No exception is set for this case. + Unless an exception is raised, all *n_bytes* of the buffer will always be + written. In the case of truncation, as many of the lowest bits of the value + as could fit are written. This allows the caller to ignore all non-negative + results if the intent is to match the typical behavior of a C-style + downcast. No exception is set on truncation. - Values are always copied as two's-complement, and sufficient buffer will be + Values are always copied as two's-complement and sufficient buffer will be requested to include a sign bit. For example, this may cause an value that fits into 8 bytes when treated as unsigned to request 9 bytes, even though all eight bytes were copied into the buffer. What has been omitted is the - zero sign bit, which is redundant when the intention is to treat the value as - unsigned. + zero sign bit -- redundant if the caller's intention is to treat the value + as unsigned. - Passing zero to *n_bytes* will return the requested buffer size. + Passing zero to *n_bytes* will return the size of a buffer that'd at least + be large enough to hold the value. .. note:: When the value does not fit in the provided buffer, the requested size - returned from the function may be larger than necessary. Passing 0 to this - function is not an accurate way to determine the bit length of a value. + returned from the function may be larger than necessary for unsigned use + cases. Passing ``n_bytes=0`` to this function is not a way to determine + the bit length of a value, the result will merely indicate a buffer size + sufficient to hold it. When passed ``n_bytes=0`` the return value may be + larger than technically necessary. .. versionadded:: 3.13 diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 4b4d3b62f72555..9f5ee507a8eb85 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -438,7 +438,12 @@ def test_long_asnativebytes(self): if support.verbose: print(f"SIZEOF_SIZE={SZ}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}") - # These tests check that the requested buffer size is correct + # These tests check that the requested buffer size is correct. + # This matches our current implementation: We only specify that the + # return value is a size *sufficient* to hold the result when queried + # using n_bytes=0. If our implementation changes, feel free to update + # the expectations here -- or loosen them to be range checks. + # (i.e. 0 *could* be stored in 1 byte and 512 in 2) for v, expect in [ (0, SZ), (512, SZ), From b04aa97c91a4a901baec692321c1717dc0866bab Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 20 Feb 2024 16:51:43 -0800 Subject: [PATCH 5/8] A little more verbose to avoid PyErr_Occurred. We don't want examples to encourage PyErr_Occurred on modern APIs. --- Doc/c-api/long.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index d36d0b0eac7d6e..1fdd0280133b16 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -395,11 +395,13 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. } // Safely get the entire value. Py_ssize_t bytes = PyLong_AsNativeBits(pylong, bignum, expected, -1); - if (bytes < 0 || bytes > expected) { // Be safe, should not be possible. - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_RuntimeError, - "Unexpected bignum truncation after a size check."); - } + if (bytes < 0) { // Exception set. + free(bignum); + return NULL; + } + else if (bytes > expected) { // Be safe, should not be possible. + PyErr_SetString(PyExc_RuntimeError, + "Unexpected bignum truncation after a size check."); free(bignum); return NULL; } From cf1dd9c7ebdbc1d2abd03272ff6fc41507862926 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 20 Feb 2024 16:55:08 -0800 Subject: [PATCH 6/8] Update Doc/c-api/long.rst Co-authored-by: Steve Dower --- Doc/c-api/long.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 1fdd0280133b16..bae611c352b1a2 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -430,8 +430,9 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. zero sign bit -- redundant if the caller's intention is to treat the value as unsigned. - Passing zero to *n_bytes* will return the size of a buffer that'd at least - be large enough to hold the value. + Passing zero to *n_bytes* will return the size of a buffer that would + be large enough to hold the value. This may be larger than technically + necessary, but not unreasonably so. .. note:: From ed5b887dd54181151eed051ebaf05e90b35189cd Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 20 Feb 2024 17:01:41 -0800 Subject: [PATCH 7/8] remove redundant text that bloats the note. --- Doc/c-api/long.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index bae611c352b1a2..14299501c92ab7 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -438,10 +438,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. When the value does not fit in the provided buffer, the requested size returned from the function may be larger than necessary for unsigned use - cases. Passing ``n_bytes=0`` to this function is not a way to determine - the bit length of a value, the result will merely indicate a buffer size - sufficient to hold it. When passed ``n_bytes=0`` the return value may be - larger than technically necessary. + cases. .. versionadded:: 3.13 From 937df33c19169668f203c36ebd2bf74f85ef84af Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Wed, 21 Feb 2024 19:02:12 -0800 Subject: [PATCH 8/8] Switch the sentence in the note. --- Doc/c-api/long.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 14299501c92ab7..582f5c7bf05471 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -436,9 +436,8 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. note:: - When the value does not fit in the provided buffer, the requested size - returned from the function may be larger than necessary for unsigned use - cases. + Passing *n_bytes=0* to this function is not an accurate way to determine + the bit length of a value. .. versionadded:: 3.13