diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 1d51fb2de0e69e4..ac3df0ed4122263 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -155,6 +155,28 @@ def test_limit_int(self): with self.assertRaises(ValueError): self.loads('1' * (maxdigits + 1)) + def test_int_boundaries(self): + # Values around the signed/unsigned 64-bit limits and the + # 19-vs-20 digit fast-path threshold of the C accelerator. + for s in ['0', '-0', + '9223372036854775807', # LLONG_MAX + '9223372036854775808', # LLONG_MAX + 1 + '-9223372036854775808', # LLONG_MIN + '-9223372036854775809', # LLONG_MIN - 1 + '9999999999999999999', # largest 19-digit + '-9999999999999999999', + '18446744073709551615', # ULLONG_MAX (20 digits) + '18446744073709551616', # ULLONG_MAX + 1 + '10000000000000000000', # smallest 20-digit + '-10000000000000000000']: + with self.subTest(s=s): + self.assertEqual(self.loads(s), int(s)) + + def test_long_float(self): + # A float longer than the C accelerator's stack buffer. + s = '0.' + '1' * 200 + self.assertEqual(self.loads(s), float(s)) + class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass diff --git a/Misc/NEWS.d/next/Library/2026-05-30-21-22-38.gh-issue-150638.LWR0QQ.rst b/Misc/NEWS.d/next/Library/2026-05-30-21-22-38.gh-issue-150638.LWR0QQ.rst new file mode 100644 index 000000000000000..c408013ce8e5326 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-30-21-22-38.gh-issue-150638.LWR0QQ.rst @@ -0,0 +1 @@ +Speed up :func:`json.loads` and :func:`json.load` parsing of numbers. diff --git a/Modules/_json.c b/Modules/_json.c index 6c4f38834631d30..39386426b6fb556 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1071,26 +1071,56 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ if (numstr == NULL) return NULL; rval = PyObject_CallOneArg(custom_func, numstr); + Py_DECREF(numstr); } else { Py_ssize_t i, n; char *buf; + + /* Fast path for integers with at most 19 digits (excluding the + optional minus sign): the magnitude always fits in an unsigned + long long, so construct the result from it directly and skip the + PyBytes allocation and the generic PyLong_FromString parser. + Integers with more digits fall back below. */ + int neg = (PyUnicode_READ(kind, str, start) == '-'); + if (!is_float && idx - start - neg <= 19) { + unsigned long long value = 0; + for (i = start + neg; i < idx; i++) { + value = value * 10 + (PyUnicode_READ(kind, str, i) - '0'); + } + *next_idx_ptr = idx; + rval = PyLong_FromUnsignedLongLong(value); + if (neg && rval != NULL) { + Py_SETREF(rval, PyNumber_Negative(rval)); + } + return rval; + } + /* Straight conversion to ASCII, to avoid costly conversion of decimal unicode digits (which cannot appear here) */ n = idx - start; - numstr = PyBytes_FromStringAndSize(NULL, n); - if (numstr == NULL) - return NULL; - buf = PyBytes_AS_STRING(numstr); + char stackbuf[64]; + if (n < (Py_ssize_t)sizeof(stackbuf)) { + buf = stackbuf; + buf[n] = '\0'; + } + else { + numstr = PyBytes_FromStringAndSize(NULL, n); + if (numstr == NULL) + return NULL; + buf = PyBytes_AS_STRING(numstr); + } for (i = 0; i < n; i++) { buf[i] = (char) PyUnicode_READ(kind, str, i + start); } - if (is_float) - rval = PyFloat_FromString(numstr); + if (is_float) { + double d = PyOS_string_to_double(buf, NULL, NULL); + rval = (d == -1.0 && PyErr_Occurred()) ? NULL : PyFloat_FromDouble(d); + } else rval = PyLong_FromString(buf, NULL, 10); + Py_XDECREF(numstr); } - Py_DECREF(numstr); *next_idx_ptr = idx; return rval; }