From 9010aac9cf93f1b5c9061ead6662c93eed3b83a5 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sat, 30 May 2026 20:56:47 +0200 Subject: [PATCH 1/3] gh-XXXXX: Speed up json.loads number parsing Add a fast path to _match_number_unicode for integers that fit in a 64-bit integer (at most 19 decimal digits): accumulate the value directly into an unsigned long long instead of allocating a PyBytes and calling the generic PyLong_FromString. Positive values use PyLong_FromUnsignedLongLong; negatives within long long range use PyLong_FromLongLong; larger integers fall back to the previous path. For floats and big integers, copy the (always-ASCII) number text into a stack buffer for the common short case to avoid the PyBytes allocation, and call PyOS_string_to_double directly for floats. Benchmarks (optimized free-threaded build): * pyperformance json_loads: 1.06x faster overall * microbench: small int arrays ~2x, 20-int doc 1.48x, mixed dict 1.16x All test_json tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- Modules/_json.c | 44 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 6c4f38834631d30..39386426b6fb556 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1071,26 +1071,56 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ if (numstr == NULL) return NULL; rval = PyObject_CallOneArg(custom_func, numstr); + Py_DECREF(numstr); } else { Py_ssize_t i, n; char *buf; + + /* Fast path for integers with at most 19 digits (excluding the + optional minus sign): the magnitude always fits in an unsigned + long long, so construct the result from it directly and skip the + PyBytes allocation and the generic PyLong_FromString parser. + Integers with more digits fall back below. */ + int neg = (PyUnicode_READ(kind, str, start) == '-'); + if (!is_float && idx - start - neg <= 19) { + unsigned long long value = 0; + for (i = start + neg; i < idx; i++) { + value = value * 10 + (PyUnicode_READ(kind, str, i) - '0'); + } + *next_idx_ptr = idx; + rval = PyLong_FromUnsignedLongLong(value); + if (neg && rval != NULL) { + Py_SETREF(rval, PyNumber_Negative(rval)); + } + return rval; + } + /* Straight conversion to ASCII, to avoid costly conversion of decimal unicode digits (which cannot appear here) */ n = idx - start; - numstr = PyBytes_FromStringAndSize(NULL, n); - if (numstr == NULL) - return NULL; - buf = PyBytes_AS_STRING(numstr); + char stackbuf[64]; + if (n < (Py_ssize_t)sizeof(stackbuf)) { + buf = stackbuf; + buf[n] = '\0'; + } + else { + numstr = PyBytes_FromStringAndSize(NULL, n); + if (numstr == NULL) + return NULL; + buf = PyBytes_AS_STRING(numstr); + } for (i = 0; i < n; i++) { buf[i] = (char) PyUnicode_READ(kind, str, i + start); } - if (is_float) - rval = PyFloat_FromString(numstr); + if (is_float) { + double d = PyOS_string_to_double(buf, NULL, NULL); + rval = (d == -1.0 && PyErr_Occurred()) ? NULL : PyFloat_FromDouble(d); + } else rval = PyLong_FromString(buf, NULL, 10); + Py_XDECREF(numstr); } - Py_DECREF(numstr); *next_idx_ptr = idx; return rval; } From b001f9628cee789f923936b5a40dbe9938ae4ae7 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sat, 30 May 2026 21:22:48 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2026-05-30-21-22-38.gh-issue-150638.LWR0QQ.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2026-05-30-21-22-38.gh-issue-150638.LWR0QQ.rst diff --git a/Misc/NEWS.d/next/Library/2026-05-30-21-22-38.gh-issue-150638.LWR0QQ.rst b/Misc/NEWS.d/next/Library/2026-05-30-21-22-38.gh-issue-150638.LWR0QQ.rst new file mode 100644 index 000000000000000..c408013ce8e5326 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-30-21-22-38.gh-issue-150638.LWR0QQ.rst @@ -0,0 +1 @@ +Speed up :func:`json.loads` and :func:`json.load` parsing of numbers. From 17d19712ec9f742a2a27c40aca1e4c7295878e4a Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sat, 30 May 2026 23:35:09 +0200 Subject: [PATCH 3/3] gh-XXXXX: Add tests for json.loads number parsing edge cases --- Lib/test/test_json/test_decode.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index d846c8af7ec4345..cf6074187b6c99c 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -148,6 +148,28 @@ def test_limit_int(self): with self.assertRaises(ValueError): self.loads('1' * (maxdigits + 1)) + def test_int_boundaries(self): + # Values around the signed/unsigned 64-bit limits and the + # 19-vs-20 digit fast-path threshold of the C accelerator. + for s in ['0', '-0', + '9223372036854775807', # LLONG_MAX + '9223372036854775808', # LLONG_MAX + 1 + '-9223372036854775808', # LLONG_MIN + '-9223372036854775809', # LLONG_MIN - 1 + '9999999999999999999', # largest 19-digit + '-9999999999999999999', + '18446744073709551615', # ULLONG_MAX (20 digits) + '18446744073709551616', # ULLONG_MAX + 1 + '10000000000000000000', # smallest 20-digit + '-10000000000000000000']: + with self.subTest(s=s): + self.assertEqual(self.loads(s), int(s)) + + def test_long_float(self): + # A float longer than the C accelerator's stack buffer. + s = '0.' + '1' * 200 + self.assertEqual(self.loads(s), float(s)) + class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass