From 250c3e1e9965e4b0359d82e64cf52bf83a266ec3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 14 Nov 2023 18:07:09 +0100 Subject: [PATCH 1/6] gh-111545: Add Py_HashDouble() function * Add again the private _PyHASH_NAN constant. * Add tests: Modules/_testcapi/hash.c and Lib/test/test_capi/test_hash.py. --- Doc/c-api/hash.rst | 26 +++++++++++ Doc/library/sys.rst | 8 +++- Doc/whatsnew/3.13.rst | 3 ++ Include/cpython/pyhash.h | 2 + Lib/test/test_capi/test_hash.py | 44 +++++++++++++++++++ ...-12-06-15-32-30.gh-issue-111545.kSOygi.rst | 2 + Modules/_testcapi/hash.c | 29 +++++++++++- Python/pyhash.c | 32 +++++++++++--- Python/sysmodule.c | 2 +- 9 files changed, 137 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-12-06-15-32-30.gh-issue-111545.kSOygi.rst diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 91d88ae27bc9f4..8cc2bef580627f 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -5,12 +5,16 @@ PyHash API See also the :c:member:`PyTypeObject.tp_hash` member. +Types +^^^^^ + .. c:type:: Py_hash_t Hash value type: signed integer. .. versionadded:: 3.2 + .. c:type:: Py_uhash_t Hash value type: unsigned integer. @@ -41,6 +45,28 @@ See also the :c:member:`PyTypeObject.tp_hash` member. .. versionadded:: 3.4 +Functions +^^^^^^^^^ + +.. c:function:: int Py_HashDouble(double value, Py_hash_t *result) + + Hash a C double number. + + * Set *\*result* to the hash and return ``1`` if *value* is finite or is + infinity. + * Set *\*result* to :data:`sys.hash_info.nan ` (``0``) and + return ``0`` if *value* is not-a-number (NaN). + + *result* must not be ``NULL``. + + .. note:: + Only rely on the function return value to distinguish the "not-a-number" + case. *\*result* can be ``0`` if *value* is finite. For example, + ``Py_HashDouble(0.0, &result)`` sets *\*result* to 0. + + .. versionadded:: 3.13 + + .. c:function:: PyHash_FuncDef* PyHash_GetFuncDef(void) Get the hash function definition. diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index aaf79205d44282..53c8e68108b0ac 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1034,7 +1034,13 @@ always available. .. attribute:: hash_info.nan - (This attribute is no longer used) + The hash value returned for not-a-number (NaN). + + This hash value is only used by the :c:func:`Py_HashDouble` C function if + the argument is not-a-number (NaN). + + .. versionchanged:: 3.10 + This hash value is no longer used to hash numbers in Python. .. attribute:: hash_info.imag diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index d599ba9ae6fac8..bcf41c0b29815c 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1274,6 +1274,9 @@ New Features * Add :c:func:`Py_HashPointer` function to hash a pointer. (Contributed by Victor Stinner in :gh:`111545`.) +* Add :c:func:`Py_HashDouble` function to hash a C double number. + (Contributed by Victor Stinner in :gh:`111545`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h index 396c208e1b106a..426e6e418ebe87 100644 --- a/Include/cpython/pyhash.h +++ b/Include/cpython/pyhash.h @@ -17,6 +17,7 @@ #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) #define _PyHASH_INF 314159 +#define _PyHASH_NAN 0 #define _PyHASH_IMAG _PyHASH_MULTIPLIER /* Helpers for hash functions */ @@ -37,3 +38,4 @@ typedef struct { PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr); +PyAPI_FUNC(int) Py_HashDouble(double value, Py_hash_t *result); diff --git a/Lib/test/test_capi/test_hash.py b/Lib/test/test_capi/test_hash.py index 8436da7c32df10..d53b6afd75d2f3 100644 --- a/Lib/test/test_capi/test_hash.py +++ b/Lib/test/test_capi/test_hash.py @@ -1,3 +1,4 @@ +import math import sys import unittest from test.support import import_helper @@ -77,3 +78,46 @@ def python_hash_pointer(x): # Py_HashPointer((void*)(uintptr_t)-1) doesn't return -1 but -2 VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1) self.assertEqual(hash_pointer(VOID_P_MAX), -2) + + def test_hash_double(self): + # Test Py_HashDouble() + hash_double = _testcapi.hash_double + + def check_number(value, expected): + self.assertEqual(hash_double(value), (1, expected)) + + # test some integers + integers = [ + *range(1, 30), + 2**30 - 1, + 2 ** 233, + int(sys.float_info.max), + ] + for x in integers: + with self.subTest(x=x): + check_number(float(x), hash(x)) + check_number(float(-x), hash(-x)) + + # test positive and negative zeros + check_number(float(0.0), 0) + check_number(float(-0.0), 0) + + # test +inf and -inf + inf = float("inf") + check_number(inf, sys.hash_info.inf) + check_number(-inf, -sys.hash_info.inf) + + # special float values: compare with Python hash() function + special_values = ( + math.nextafter(0.0, 1.0), # smallest positive subnormal number + sys.float_info.min, # smallest positive normal number + sys.float_info.epsilon, + sys.float_info.max, # largest positive finite number + ) + for x in special_values: + with self.subTest(x=x): + check_number(x, hash(x)) + check_number(-x, hash(-x)) + + # test not-a-number (NaN) + self.assertEqual(hash_double(float('nan')), (0, sys.hash_info.nan)) diff --git a/Misc/NEWS.d/next/C API/2023-12-06-15-32-30.gh-issue-111545.kSOygi.rst b/Misc/NEWS.d/next/C API/2023-12-06-15-32-30.gh-issue-111545.kSOygi.rst new file mode 100644 index 00000000000000..b6f1db895a3523 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-12-06-15-32-30.gh-issue-111545.kSOygi.rst @@ -0,0 +1,2 @@ +Add :c:func:`Py_HashDouble` function to hash a C double number. Patch by +Victor Stinner. diff --git a/Modules/_testcapi/hash.c b/Modules/_testcapi/hash.c index aee76787dcddb3..cadb45a61b9b4d 100644 --- a/Modules/_testcapi/hash.c +++ b/Modules/_testcapi/hash.c @@ -1,6 +1,17 @@ #include "parts.h" #include "util.h" + +static PyObject * +long_from_hash(Py_hash_t hash) +{ + assert(hash != -1); + + Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash)); + return PyLong_FromLongLong(hash); +} + + static PyObject * hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) { @@ -54,14 +65,28 @@ hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg) } Py_hash_t hash = Py_HashPointer(ptr); - Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash)); - return PyLong_FromLongLong(hash); + return long_from_hash(hash); +} + + +static PyObject * +hash_double(PyObject *Py_UNUSED(module), PyObject *args) +{ + double value; + if (!PyArg_ParseTuple(args, "d", &value)) { + return NULL; + } + + Py_hash_t hash; + int res = Py_HashDouble(value, &hash); + return Py_BuildValue("iN", res, long_from_hash(hash)); } static PyMethodDef test_methods[] = { {"hash_getfuncdef", hash_getfuncdef, METH_NOARGS}, {"hash_pointer", hash_pointer, METH_O}, + {"hash_double", hash_double, METH_VARARGS}, {NULL}, }; diff --git a/Python/pyhash.c b/Python/pyhash.c index 141407c265677a..0c85a736a0da54 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -83,18 +83,23 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; */ -Py_hash_t -_Py_HashDouble(PyObject *inst, double v) +int +Py_HashDouble(double v, Py_hash_t *result) { int e, sign; double m; Py_uhash_t x, y; if (!Py_IS_FINITE(v)) { - if (Py_IS_INFINITY(v)) - return v > 0 ? _PyHASH_INF : -_PyHASH_INF; - else - return _Py_HashPointer(inst); + if (Py_IS_INFINITY(v)) { + *result = (v > 0 ? _PyHASH_INF : -_PyHASH_INF); + return 1; + } + else { + assert(Py_IS_NAN(v)); + *result = _PyHASH_NAN; + return 0; + } } m = frexp(v, &e); @@ -126,7 +131,20 @@ _Py_HashDouble(PyObject *inst, double v) x = x * sign; if (x == (Py_uhash_t)-1) x = (Py_uhash_t)-2; - return (Py_hash_t)x; + *result = (Py_hash_t)x; + return 1; +} + +Py_hash_t +_Py_HashDouble(PyObject *obj, double v) +{ + assert(obj != NULL); + + Py_hash_t hash; + if (Py_HashDouble(v, &hash) == 0) { + hash = Py_HashPointer(obj); + } + return hash; } Py_hash_t diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 57dc4a1226ce75..cefe0e965a5519 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1508,7 +1508,7 @@ get_hash_info(PyThreadState *tstate) PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_INF)); PyStructSequence_SET_ITEM(hash_info, field++, - PyLong_FromLong(0)); // This is no longer used + PyLong_FromLong(_PyHASH_NAN)); PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_IMAG)); PyStructSequence_SET_ITEM(hash_info, field++, From dd19138a11852a824206848b2a4bca744ef76904 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 12 Dec 2023 15:33:58 +0100 Subject: [PATCH 2/6] Update --- Doc/c-api/hash.rst | 7 +++---- Doc/library/sys.rst | 8 +------- Include/cpython/pyhash.h | 1 - Python/pyhash.c | 2 +- Python/sysmodule.c | 2 +- 5 files changed, 6 insertions(+), 14 deletions(-) diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 8cc2bef580627f..95f5f260509e46 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -52,10 +52,9 @@ Functions Hash a C double number. - * Set *\*result* to the hash and return ``1`` if *value* is finite or is - infinity. - * Set *\*result* to :data:`sys.hash_info.nan ` (``0``) and - return ``0`` if *value* is not-a-number (NaN). + * Set *\*result* to the hash value and return ``1`` on success. + * Set *\*result* to ``0`` and return ``0`` if the hash value cannot be + calculated. For example, if value is not-a-number (NaN). *result* must not be ``NULL``. diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 53c8e68108b0ac..aaf79205d44282 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1034,13 +1034,7 @@ always available. .. attribute:: hash_info.nan - The hash value returned for not-a-number (NaN). - - This hash value is only used by the :c:func:`Py_HashDouble` C function if - the argument is not-a-number (NaN). - - .. versionchanged:: 3.10 - This hash value is no longer used to hash numbers in Python. + (This attribute is no longer used) .. attribute:: hash_info.imag diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h index 426e6e418ebe87..f495723cc99d9a 100644 --- a/Include/cpython/pyhash.h +++ b/Include/cpython/pyhash.h @@ -17,7 +17,6 @@ #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) #define _PyHASH_INF 314159 -#define _PyHASH_NAN 0 #define _PyHASH_IMAG _PyHASH_MULTIPLIER /* Helpers for hash functions */ diff --git a/Python/pyhash.c b/Python/pyhash.c index 0c85a736a0da54..a9ea6cd81e9f2a 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -97,7 +97,7 @@ Py_HashDouble(double v, Py_hash_t *result) } else { assert(Py_IS_NAN(v)); - *result = _PyHASH_NAN; + *result = 0; return 0; } } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index cefe0e965a5519..57dc4a1226ce75 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1508,7 +1508,7 @@ get_hash_info(PyThreadState *tstate) PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_INF)); PyStructSequence_SET_ITEM(hash_info, field++, - PyLong_FromLong(_PyHASH_NAN)); + PyLong_FromLong(0)); // This is no longer used PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_IMAG)); PyStructSequence_SET_ITEM(hash_info, field++, From 44d3bd949c25adc25248f55f65a8e6281c95e5a0 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 12 Dec 2023 15:41:26 +0100 Subject: [PATCH 3/6] Update Doc/c-api/hash.rst --- Doc/c-api/hash.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 95f5f260509e46..5e6cbcc3826140 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -54,7 +54,7 @@ Functions * Set *\*result* to the hash value and return ``1`` on success. * Set *\*result* to ``0`` and return ``0`` if the hash value cannot be - calculated. For example, if value is not-a-number (NaN). + calculated. For example, if *value* is not-a-number (NaN). *result* must not be ``NULL``. From 6df01835cea9e095019e554a268da8d8fda33b1f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 12 Dec 2023 15:43:42 +0100 Subject: [PATCH 4/6] Update Lib/test/test_capi/test_hash.py --- Lib/test/test_capi/test_hash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_hash.py b/Lib/test/test_capi/test_hash.py index d53b6afd75d2f3..5d20f61f202718 100644 --- a/Lib/test/test_capi/test_hash.py +++ b/Lib/test/test_capi/test_hash.py @@ -120,4 +120,4 @@ def check_number(value, expected): check_number(-x, hash(-x)) # test not-a-number (NaN) - self.assertEqual(hash_double(float('nan')), (0, sys.hash_info.nan)) + self.assertEqual(hash_double(float('nan')), (0, 0)) From 62ecc19a5d1c7b2226dca99842fac8bfaebb998c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 12 Dec 2023 15:47:07 +0100 Subject: [PATCH 5/6] Update Python/pyhash.c --- Python/pyhash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/pyhash.c b/Python/pyhash.c index a9ea6cd81e9f2a..a59899084f4a78 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -136,12 +136,12 @@ Py_HashDouble(double v, Py_hash_t *result) } Py_hash_t -_Py_HashDouble(PyObject *obj, double v) +_Py_HashDouble(PyObject *obj, double value) { assert(obj != NULL); Py_hash_t hash; - if (Py_HashDouble(v, &hash) == 0) { + if (Py_HashDouble(value, &hash) == 0) { hash = Py_HashPointer(obj); } return hash; From 95a8f09d8be1c09adb2dbf0d2c2b0811db9eb569 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 12 Dec 2023 16:55:36 +0100 Subject: [PATCH 6/6] Suggest how to replace private _Py_HashDouble() --- Doc/whatsnew/3.13.rst | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index bcf41c0b29815c..832536eb9e89c1 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1274,7 +1274,19 @@ New Features * Add :c:func:`Py_HashPointer` function to hash a pointer. (Contributed by Victor Stinner in :gh:`111545`.) -* Add :c:func:`Py_HashDouble` function to hash a C double number. +* Add :c:func:`Py_HashDouble` function to hash a C double number. Existing code + using the private ``_Py_HashDouble()`` function can be updated to:: + + Py_hash_t + hash_double(PyObject *obj, double value) + { + Py_hash_t hash; + if (Py_HashDouble(value, &hash) == 0) { + hash = Py_HashPointer(obj); + } + return hash; + } + (Contributed by Victor Stinner in :gh:`111545`.)