From 81621298add2ac6ff0439b641887614664bb740c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 25 Sep 2024 11:41:23 +0200 Subject: [PATCH 1/5] gh-124502: Add PyUnicode_Equal() function --- Doc/c-api/unicode.rst | 18 +++++++++++++ Doc/data/stable_abi.dat | 1 + Doc/whatsnew/3.14.rst | 3 +++ Include/unicodeobject.h | 4 +++ Lib/test/test_capi/test_unicode.py | 27 +++++++++++++++++++ Lib/test/test_stable_abi_ctypes.py | 1 + ...-09-25-11-44-02.gh-issue-124502.qWuDjT.rst | 2 ++ Misc/stable_abi.toml | 2 ++ Modules/_testlimitedcapi/unicode.c | 18 +++++++++++++ Objects/unicodeobject.c | 17 ++++++++++++ PC/python3dll.c | 1 + 11 files changed, 94 insertions(+) create mode 100644 Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 958fafd47ac81b..3cb9984199bbe5 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1423,6 +1423,24 @@ They all return ``NULL`` or ``-1`` if an exception occurs. This function returns ``-1`` upon failure, so one should call :c:func:`PyErr_Occurred` to check for errors. + See also :c:func:`PyUnicode_Equal`. + + +.. c:function:: int PyUnicode_Equal(PyObject *a, PyObject *b) + + Test if two strings are equal: + + * Return ``1`` if *a* is equal to *b*. + * Return ``0`` if *a* is not equal to *b*. + * Set a :exc:`TypeError` exception and return ``-1`` if *a* or *b* is not a + Python :class:`str` object. + + The function always succeed if *a* and *b* are Python :class:`str` objects. + + See also :c:func:`PyUnicode_Compare`. + + .. versionadded:: 3.14 + .. c:function:: int PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *string, Py_ssize_t size) diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 19dc71a345b474..9314facd2ad873 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -783,6 +783,7 @@ func,PyUnicode_DecodeUnicodeEscape,3.2,, func,PyUnicode_EncodeCodePage,3.7,on Windows, func,PyUnicode_EncodeFSDefault,3.2,, func,PyUnicode_EncodeLocale,3.7,, +func,PyUnicode_Equal,3.14,, func,PyUnicode_EqualToUTF8,3.13,, func,PyUnicode_EqualToUTF8AndSize,3.13,, func,PyUnicode_FSConverter,3.2,, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5acb9bfe18b2d0..76ad76cdbc75af 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -646,6 +646,9 @@ New Features `__ mentioned in :pep:`630` (:gh:`124153`). +* Add :c:func:`PyUnicode_Equal` function to test if two strings are equal. + (Contributed by Victor Stinner in :gh:`124502`.) + Porting to Python 3.14 ---------------------- diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index dee00715b3c51d..2ce3a008b7129e 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -966,6 +966,10 @@ PyAPI_FUNC(int) PyUnicode_EqualToUTF8(PyObject *, const char *); PyAPI_FUNC(int) PyUnicode_EqualToUTF8AndSize(PyObject *, const char *, Py_ssize_t); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030e0000 +PyAPI_FUNC(int) PyUnicode_Equal(PyObject *str1, PyObject *str2); +#endif + /* Rich compare two strings and return one of the following: - NULL in case an exception was raised diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index e6f85427214958..6a55c3dc5347dc 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1903,6 +1903,33 @@ def test_recover_error(self): self.assertEqual(writer.finish(), 'Hello World.') + def test_unicode_equal(self): + unicode_equal = _testlimitedcapi.unicode_equal + + def copy(text): + return text.encode().decode() + + self.assertTrue(unicode_equal("", "")) + self.assertTrue(unicode_equal("abc", "abc")) + self.assertTrue(unicode_equal("abc", copy("abc"))) + self.assertTrue(unicode_equal("\u20ac", copy("\u20ac"))) + self.assertTrue(unicode_equal("\U0010ffff", copy("\U0010ffff"))) + + self.assertFalse(unicode_equal("abc", "abcd")) + self.assertFalse(unicode_equal("\u20ac", "\u20ad")) + self.assertFalse(unicode_equal("\U0010ffff", "\U0010fffe")) + + # invalid type + for invalid_type in (b'bytes', 123, ("tuple",)): + with self.subTest(invalid_type=invalid_type): + with self.assertRaises(TypeError): + unicode_equal("abc", invalid_type) + with self.assertRaises(TypeError): + unicode_equal(invalid_type, "abc") + + # CRASHES unicode_equal("abc", NULL) + # CRASHES unicode_equal(NULL, "abc") + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index d16ad7ef5d4328..b14d500a9c6e97 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -805,6 +805,7 @@ def test_windows_feature_macros(self): "PyUnicode_DecodeUnicodeEscape", "PyUnicode_EncodeFSDefault", "PyUnicode_EncodeLocale", + "PyUnicode_Equal", "PyUnicode_EqualToUTF8", "PyUnicode_EqualToUTF8AndSize", "PyUnicode_FSConverter", diff --git a/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst new file mode 100644 index 00000000000000..b763a5e1aaba3f --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyUnicode_Equal` function to test if two strings are equal. +Patch by Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index fe0a5e44f8fb15..62978261745d79 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2536,3 +2536,5 @@ added = '3.14' [const.Py_TP_USE_SPEC] added = '3.14' +[function.PyUnicode_Equal] + added = '3.14' diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 2b70d09108a333..94266da6234a34 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1837,6 +1837,23 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) #undef CHECK_FORMAT_0 } + +/* Test PyUnicode_Equal() */ +static PyObject * +unicode_equal(PyObject *module, PyObject *args) +{ + PyObject *str1, *str2; + if (!PyArg_ParseTuple(args, "OO", &str1, &str2)) { + return NULL; + } + + NULLABLE(str1); + NULLABLE(str2); + RETURN_INT(PyUnicode_Equal(str1, str2)); +} + + + static PyMethodDef TestMethods[] = { {"codec_incrementalencoder", codec_incrementalencoder, METH_VARARGS}, {"codec_incrementaldecoder", codec_incrementaldecoder, METH_VARARGS}, @@ -1924,6 +1941,7 @@ static PyMethodDef TestMethods[] = { {"unicode_format", unicode_format, METH_VARARGS}, {"unicode_contains", unicode_contains, METH_VARARGS}, {"unicode_isidentifier", unicode_isidentifier, METH_O}, + {"unicode_equal", unicode_equal, METH_VARARGS}, {NULL}, }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2494c989544ca0..9d82872306ce1b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11009,6 +11009,23 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2) } +int +PyUnicode_Equal(PyObject *str1, PyObject *str2) +{ + if (!PyUnicode_Check(str1)) { + PyErr_Format(PyExc_TypeError, + "first argument must be str, not %T", str1); + return -1; + } + if (!PyUnicode_Check(str2)) { + PyErr_Format(PyExc_TypeError, + "second argument must be str, not %T", str2); + return -1; + } + return _PyUnicode_Equal(str1, str2); +} + + int PyUnicode_Compare(PyObject *left, PyObject *right) { diff --git a/PC/python3dll.c b/PC/python3dll.c index 6b8208ab90bd95..9296474617e115 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -717,6 +717,7 @@ EXPORT_FUNC(PyUnicode_DecodeUTF8Stateful) EXPORT_FUNC(PyUnicode_EncodeCodePage) EXPORT_FUNC(PyUnicode_EncodeFSDefault) EXPORT_FUNC(PyUnicode_EncodeLocale) +EXPORT_FUNC(PyUnicode_Equal) EXPORT_FUNC(PyUnicode_EqualToUTF8) EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize) EXPORT_FUNC(PyUnicode_Find) From 4a2ae3d7b9de626d34e1af85c1a858003b2466aa Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 25 Sep 2024 13:22:37 +0200 Subject: [PATCH 2/5] Fix for regular build (not Free Threading) --- Modules/_testlimitedcapi/unicode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 94266da6234a34..c7a23d5d1cbd71 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1,7 +1,7 @@ #include "pyconfig.h" // Py_GIL_DISABLED #ifndef Py_GIL_DISABLED - // Need limited C API 3.13 to test PyUnicode_EqualToUTF8() -# define Py_LIMITED_API 0x030d0000 + // Need limited C API 3.14 to test PyUnicode_Equal() +# define Py_LIMITED_API 0x030e0000 #endif #include "parts.h" From 89531920e11c07ba291521266bfe066a6d655d94 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 25 Sep 2024 15:34:02 +0200 Subject: [PATCH 3/5] Address reviews --- Doc/c-api/unicode.rst | 12 ++++++++---- Doc/whatsnew/3.14.rst | 3 ++- Lib/test/test_capi/test_unicode.py | 6 ++++++ ...24-09-25-11-44-02.gh-issue-124502.qWuDjT.rst | 4 ++-- Objects/unicodeobject.c | 17 +++++++---------- 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 3cb9984199bbe5..3d627180f47432 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1423,7 +1423,9 @@ They all return ``NULL`` or ``-1`` if an exception occurs. This function returns ``-1`` upon failure, so one should call :c:func:`PyErr_Occurred` to check for errors. - See also :c:func:`PyUnicode_Equal`. + .. seealso:: + + The :c:func:`PyUnicode_Equal` function. .. c:function:: int PyUnicode_Equal(PyObject *a, PyObject *b) @@ -1433,11 +1435,13 @@ They all return ``NULL`` or ``-1`` if an exception occurs. * Return ``1`` if *a* is equal to *b*. * Return ``0`` if *a* is not equal to *b*. * Set a :exc:`TypeError` exception and return ``-1`` if *a* or *b* is not a - Python :class:`str` object. + :class:`str` object. - The function always succeed if *a* and *b* are Python :class:`str` objects. + The function always succeeds if *a* and *b* are :class:`str` objects. + + .. seealso:: - See also :c:func:`PyUnicode_Compare`. + The :c:func:`PyUnicode_Compare` function. .. versionadded:: 3.14 diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 76ad76cdbc75af..b019d7ea255d30 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -646,7 +646,8 @@ New Features `__ mentioned in :pep:`630` (:gh:`124153`). -* Add :c:func:`PyUnicode_Equal` function to test if two strings are equal. +* Add :c:func:`PyUnicode_Equal` function to the limited C API: + test if two strings are equal. (Contributed by Victor Stinner in :gh:`124502`.) diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 6a55c3dc5347dc..65d8242ad3fc60 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1919,6 +1919,12 @@ def copy(text): self.assertFalse(unicode_equal("\u20ac", "\u20ad")) self.assertFalse(unicode_equal("\U0010ffff", "\U0010fffe")) + # str subclass + self.assertTrue(unicode_equal("abc", Str("abc"))) + self.assertTrue(unicode_equal(Str("abc"), "abc")) + self.assertFalse(unicode_equal("abc", Str("abcd"))) + self.assertFalse(unicode_equal(Str("abc"), "abcd")) + # invalid type for invalid_type in (b'bytes', 123, ("tuple",)): with self.subTest(invalid_type=invalid_type): diff --git a/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst index b763a5e1aaba3f..f515619328b359 100644 --- a/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst +++ b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst @@ -1,2 +1,2 @@ -Add :c:func:`PyUnicode_Equal` function to test if two strings are equal. -Patch by Victor Stinner. +Add :c:func:`PyUnicode_Equal` function to the limited C API: test if two +strings are equal. Patch by Victor Stinner. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9d82872306ce1b..9a1c0ddcf7825f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11012,17 +11012,14 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2) int PyUnicode_Equal(PyObject *str1, PyObject *str2) { - if (!PyUnicode_Check(str1)) { - PyErr_Format(PyExc_TypeError, - "first argument must be str, not %T", str1); - return -1; - } - if (!PyUnicode_Check(str2)) { - PyErr_Format(PyExc_TypeError, - "second argument must be str, not %T", str2); - return -1; + if (PyUnicode_Check(str1) && PyUnicode_Check(str2)) { + return _PyUnicode_Equal(str1, str2); } - return _PyUnicode_Equal(str1, str2); + + PyErr_Format(PyExc_TypeError, + "Can't compare %T and %T", + str1, str2); + return -1; } From ac057c65832df5fa9c12d4fd375e8e98ffa13e83 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 25 Sep 2024 16:28:24 +0200 Subject: [PATCH 4/5] Change error message --- Objects/unicodeobject.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9a1c0ddcf7825f..dc3d5ba630af18 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11012,14 +11012,18 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2) int PyUnicode_Equal(PyObject *str1, PyObject *str2) { - if (PyUnicode_Check(str1) && PyUnicode_Check(str2)) { - return _PyUnicode_Equal(str1, str2); + if (!PyUnicode_Check(str1)) { + PyErr_Format(PyExc_TypeError, + "first argument must be str, not %T", str1); + return -1; + } + if (!PyUnicode_Check(str2)) { + PyErr_Format(PyExc_TypeError, + "second argument must be str, not %T", str2); + return -1; } - PyErr_Format(PyExc_TypeError, - "Can't compare %T and %T", - str1, str2); - return -1; + return _PyUnicode_Equal(str1, str2); } From b5b30bf08247c90e5272c631a883bf459e9a0053 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 1 Oct 2024 17:07:52 +0200 Subject: [PATCH 5/5] Mention that __eq__() is not honored --- Doc/c-api/unicode.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 3d627180f47432..cdfca96bcd1fed 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1439,6 +1439,9 @@ They all return ``NULL`` or ``-1`` if an exception occurs. The function always succeeds if *a* and *b* are :class:`str` objects. + The function works for :class:`str` subclasses, but does not honor custom + ``__eq__()`` method. + .. seealso:: The :c:func:`PyUnicode_Compare` function.