From a7f0727ca575fef4d8891b5ebfe71ef2a774868b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 7 Oct 2024 23:24:53 +0200 Subject: [PATCH] gh-124502: Add PyUnicode_Equal() function (#124504) --- Doc/c-api/unicode.rst | 25 ++++++++++++++ Doc/data/stable_abi.dat | 1 + Doc/whatsnew/3.14.rst | 4 +++ Include/unicodeobject.h | 4 +++ Lib/test/test_capi/test_unicode.py | 33 +++++++++++++++++++ Lib/test/test_stable_abi_ctypes.py | 1 + ...-09-25-11-44-02.gh-issue-124502.qWuDjT.rst | 2 ++ Misc/stable_abi.toml | 2 ++ Modules/_testlimitedcapi/unicode.c | 22 +++++++++++-- Objects/unicodeobject.c | 18 ++++++++++ PC/python3dll.c | 1 + 11 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index b2ac0c903c2bd7..f5704cffa199a5 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1438,6 +1438,31 @@ They all return ``NULL`` or ``-1`` if an exception occurs. This function returns ``-1`` upon failure, so one should call :c:func:`PyErr_Occurred` to check for errors. + .. seealso:: + + The :c:func:`PyUnicode_Equal` function. + + +.. c:function:: int PyUnicode_Equal(PyObject *a, PyObject *b) + + Test if two strings are equal: + + * Return ``1`` if *a* is equal to *b*. + * Return ``0`` if *a* is not equal to *b*. + * Set a :exc:`TypeError` exception and return ``-1`` if *a* or *b* is not a + :class:`str` object. + + The function always succeeds if *a* and *b* are :class:`str` objects. + + The function works for :class:`str` subclasses, but does not honor custom + ``__eq__()`` method. + + .. seealso:: + + The :c:func:`PyUnicode_Compare` function. + + .. versionadded:: 3.14 + .. c:function:: int PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *string, Py_ssize_t size) diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 19dc71a345b474..9314facd2ad873 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -783,6 +783,7 @@ func,PyUnicode_DecodeUnicodeEscape,3.2,, func,PyUnicode_EncodeCodePage,3.7,on Windows, func,PyUnicode_EncodeFSDefault,3.2,, func,PyUnicode_EncodeLocale,3.7,, +func,PyUnicode_Equal,3.14,, func,PyUnicode_EqualToUTF8,3.13,, func,PyUnicode_EqualToUTF8AndSize,3.13,, func,PyUnicode_FSConverter,3.2,, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 67d8d389b58082..f1f78ed843f313 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -687,6 +687,10 @@ New Features `__ mentioned in :pep:`630` (:gh:`124153`). +* Add :c:func:`PyUnicode_Equal` function to the limited C API: + test if two strings are equal. + (Contributed by Victor Stinner in :gh:`124502`.) + Porting to Python 3.14 ---------------------- diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index dee00715b3c51d..2ce3a008b7129e 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -966,6 +966,10 @@ PyAPI_FUNC(int) PyUnicode_EqualToUTF8(PyObject *, const char *); PyAPI_FUNC(int) PyUnicode_EqualToUTF8AndSize(PyObject *, const char *, Py_ssize_t); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030e0000 +PyAPI_FUNC(int) PyUnicode_Equal(PyObject *str1, PyObject *str2); +#endif + /* Rich compare two strings and return one of the following: - NULL in case an exception was raised diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index e6f85427214958..65d8242ad3fc60 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1903,6 +1903,39 @@ def test_recover_error(self): self.assertEqual(writer.finish(), 'Hello World.') + def test_unicode_equal(self): + unicode_equal = _testlimitedcapi.unicode_equal + + def copy(text): + return text.encode().decode() + + self.assertTrue(unicode_equal("", "")) + self.assertTrue(unicode_equal("abc", "abc")) + self.assertTrue(unicode_equal("abc", copy("abc"))) + self.assertTrue(unicode_equal("\u20ac", copy("\u20ac"))) + self.assertTrue(unicode_equal("\U0010ffff", copy("\U0010ffff"))) + + self.assertFalse(unicode_equal("abc", "abcd")) + self.assertFalse(unicode_equal("\u20ac", "\u20ad")) + self.assertFalse(unicode_equal("\U0010ffff", "\U0010fffe")) + + # str subclass + self.assertTrue(unicode_equal("abc", Str("abc"))) + self.assertTrue(unicode_equal(Str("abc"), "abc")) + self.assertFalse(unicode_equal("abc", Str("abcd"))) + self.assertFalse(unicode_equal(Str("abc"), "abcd")) + + # invalid type + for invalid_type in (b'bytes', 123, ("tuple",)): + with self.subTest(invalid_type=invalid_type): + with self.assertRaises(TypeError): + unicode_equal("abc", invalid_type) + with self.assertRaises(TypeError): + unicode_equal(invalid_type, "abc") + + # CRASHES unicode_equal("abc", NULL) + # CRASHES unicode_equal(NULL, "abc") + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index d16ad7ef5d4328..b14d500a9c6e97 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -805,6 +805,7 @@ def test_windows_feature_macros(self): "PyUnicode_DecodeUnicodeEscape", "PyUnicode_EncodeFSDefault", "PyUnicode_EncodeLocale", + "PyUnicode_Equal", "PyUnicode_EqualToUTF8", "PyUnicode_EqualToUTF8AndSize", "PyUnicode_FSConverter", diff --git a/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst new file mode 100644 index 00000000000000..f515619328b359 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyUnicode_Equal` function to the limited C API: test if two +strings are equal. Patch by Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index fe0a5e44f8fb15..62978261745d79 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2536,3 +2536,5 @@ added = '3.14' [const.Py_TP_USE_SPEC] added = '3.14' +[function.PyUnicode_Equal] + added = '3.14' diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 2b70d09108a333..c7a23d5d1cbd71 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1,7 +1,7 @@ #include "pyconfig.h" // Py_GIL_DISABLED #ifndef Py_GIL_DISABLED - // Need limited C API 3.13 to test PyUnicode_EqualToUTF8() -# define Py_LIMITED_API 0x030d0000 + // Need limited C API 3.14 to test PyUnicode_Equal() +# define Py_LIMITED_API 0x030e0000 #endif #include "parts.h" @@ -1837,6 +1837,23 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) #undef CHECK_FORMAT_0 } + +/* Test PyUnicode_Equal() */ +static PyObject * +unicode_equal(PyObject *module, PyObject *args) +{ + PyObject *str1, *str2; + if (!PyArg_ParseTuple(args, "OO", &str1, &str2)) { + return NULL; + } + + NULLABLE(str1); + NULLABLE(str2); + RETURN_INT(PyUnicode_Equal(str1, str2)); +} + + + static PyMethodDef TestMethods[] = { {"codec_incrementalencoder", codec_incrementalencoder, METH_VARARGS}, {"codec_incrementaldecoder", codec_incrementaldecoder, METH_VARARGS}, @@ -1924,6 +1941,7 @@ static PyMethodDef TestMethods[] = { {"unicode_format", unicode_format, METH_VARARGS}, {"unicode_contains", unicode_contains, METH_VARARGS}, {"unicode_isidentifier", unicode_isidentifier, METH_O}, + {"unicode_equal", unicode_equal, METH_VARARGS}, {NULL}, }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e9589cfe44f3bf..60d4875d3b393e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11001,6 +11001,24 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2) } +int +PyUnicode_Equal(PyObject *str1, PyObject *str2) +{ + if (!PyUnicode_Check(str1)) { + PyErr_Format(PyExc_TypeError, + "first argument must be str, not %T", str1); + return -1; + } + if (!PyUnicode_Check(str2)) { + PyErr_Format(PyExc_TypeError, + "second argument must be str, not %T", str2); + return -1; + } + + return _PyUnicode_Equal(str1, str2); +} + + int PyUnicode_Compare(PyObject *left, PyObject *right) { diff --git a/PC/python3dll.c b/PC/python3dll.c index 6b8208ab90bd95..9296474617e115 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -717,6 +717,7 @@ EXPORT_FUNC(PyUnicode_DecodeUTF8Stateful) EXPORT_FUNC(PyUnicode_EncodeCodePage) EXPORT_FUNC(PyUnicode_EncodeFSDefault) EXPORT_FUNC(PyUnicode_EncodeLocale) +EXPORT_FUNC(PyUnicode_Equal) EXPORT_FUNC(PyUnicode_EqualToUTF8) EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize) EXPORT_FUNC(PyUnicode_Find)