From f5c39b3e9cc88d1eaa9229d610b0221305a83ad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Libor=20Mart=C3=ADnek?= Date: Sun, 4 Aug 2024 14:02:29 +0000 Subject: [PATCH 01/23] gh-122661: Remove GNU make-specific directive from Doc/Makefile (#122662) --- Doc/Makefile | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/Doc/Makefile b/Doc/Makefile index c70768754834dd..b2ee3fe7d28ed0 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -6,6 +6,7 @@ # You can set these variables from the command line. PYTHON = python3 VENVDIR = ./venv +UV = uv SPHINXBUILD = PATH=$(VENVDIR)/bin:$$PATH sphinx-build BLURB = PATH=$(VENVDIR)/bin:$$PATH blurb JOBS = auto @@ -150,14 +151,10 @@ gettext: build htmlview: html $(PYTHON) -c "import os, webbrowser; webbrowser.open('file://' + os.path.realpath('build/html/index.html'))" -.PHONY: ensure-sphinx-autobuild -ensure-sphinx-autobuild: venv - $(call ensure_package,sphinx-autobuild) - .PHONY: htmllive htmllive: SPHINXBUILD = $(VENVDIR)/bin/sphinx-autobuild htmllive: SPHINXOPTS = --re-ignore="/venv/" --open-browser --delay 0 -htmllive: ensure-sphinx-autobuild html +htmllive: _ensure-sphinx-autobuild html .PHONY: clean clean: clean-venv @@ -174,15 +171,15 @@ venv: echo "To recreate it, remove it first with \`make clean-venv'."; \ else \ echo "Creating venv in $(VENVDIR)"; \ - if uv --version > /dev/null; then \ - uv venv $(VENVDIR); \ - VIRTUAL_ENV=$(VENVDIR) uv pip install -r $(REQUIREMENTS); \ + if $(UV) --version >/dev/null 2>&1; then \ + $(UV) venv $(VENVDIR); \ + VIRTUAL_ENV=$(VENVDIR) $(UV) pip install -r $(REQUIREMENTS); \ else \ $(PYTHON) -m venv $(VENVDIR); \ $(VENVDIR)/bin/python3 -m pip install --upgrade pip; \ $(VENVDIR)/bin/python3 -m pip install -r $(REQUIREMENTS); \ - echo "The venv has been created in the $(VENVDIR) directory"; \ fi; \ + echo "The venv has been created in the $(VENVDIR) directory"; \ fi .PHONY: dist @@ -240,17 +237,24 @@ dist: rm -r dist/python-$(DISTVERSION)-docs-texinfo rm dist/python-$(DISTVERSION)-docs-texinfo.tar -define ensure_package - if uv --version > /dev/null; then \ - $(VENVDIR)/bin/python3 -m $(1) --version > /dev/null || VIRTUAL_ENV=$(VENVDIR) uv pip install $(1); \ +.PHONY: _ensure-package +_ensure-package: venv + if $(UV) --version >/dev/null 2>&1; then \ + VIRTUAL_ENV=$(VENVDIR) $(UV) pip install $(PACKAGE); \ else \ - $(VENVDIR)/bin/python3 -m $(1) --version > /dev/null || $(VENVDIR)/bin/python3 -m pip install $(1); \ + $(VENVDIR)/bin/python3 -m pip install $(PACKAGE); \ fi -endef + +.PHONY: _ensure-pre-commit +_ensure-pre-commit: + make _ensure-package PACKAGE=pre-commit + +.PHONY: _ensure-sphinx-autobuild +_ensure-sphinx-autobuild: + make _ensure-package PACKAGE=sphinx-autobuild .PHONY: check -check: venv - $(call ensure_package,pre_commit) +check: _ensure-pre-commit $(VENVDIR)/bin/python3 -m pre_commit run --all-files .PHONY: serve From 3bde3d8e03eb3d0632d0dced0ab710ab9e3b2894 Mon Sep 17 00:00:00 2001 From: Damien <81557462+Damien-Chen@users.noreply.github.com> Date: Mon, 5 Aug 2024 00:57:20 +0800 Subject: [PATCH 02/23] Add `3.13` and remove `3.7` in Azure Pipelines (#122670) --- .azure-pipelines/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/ci.yml b/.azure-pipelines/ci.yml index b5b2765e43844f..d3e842d9f31d01 100644 --- a/.azure-pipelines/ci.yml +++ b/.azure-pipelines/ci.yml @@ -1,4 +1,4 @@ -trigger: ['main', '3.12', '3.11', '3.10', '3.9', '3.8', '3.7'] +trigger: ['main', '3.13', '3.12', '3.11', '3.10', '3.9', '3.8'] jobs: - job: Prebuild From d0b92dd5ca46a10558857adeb7bb48ecf39fa783 Mon Sep 17 00:00:00 2001 From: Jonathan Protzenko Date: Sun, 4 Aug 2024 16:22:51 -0700 Subject: [PATCH 03/23] gh-122573: Require Python 3.10 or newer for Windows builds (GH-122574) Match statements in tooling require a more recent Python. Tools/cases_generator/*.py (and `Tools/jit/*.py` in 3.13+). Co-authored-by: Erlend E. Aasland Co-authored-by: Gregory P. Smith --- .../Windows/2024-08-01-10-55-15.gh-issue-122573.4-UCFY.rst | 1 + PCbuild/find_python.bat | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-08-01-10-55-15.gh-issue-122573.4-UCFY.rst diff --git a/Misc/NEWS.d/next/Windows/2024-08-01-10-55-15.gh-issue-122573.4-UCFY.rst b/Misc/NEWS.d/next/Windows/2024-08-01-10-55-15.gh-issue-122573.4-UCFY.rst new file mode 100644 index 00000000000000..5cc69e206debf5 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-08-01-10-55-15.gh-issue-122573.4-UCFY.rst @@ -0,0 +1 @@ +The Windows build of CPython now requires 3.10 or newer. diff --git a/PCbuild/find_python.bat b/PCbuild/find_python.bat index af85f6d362466e..6db579fa8de08a 100644 --- a/PCbuild/find_python.bat +++ b/PCbuild/find_python.bat @@ -39,15 +39,15 @@ @if "%_Py_EXTERNALS_DIR%"=="" (set _Py_EXTERNALS_DIR=%_Py_D%\..\externals) @rem If we have Python in externals, use that one -@if exist "%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" ("%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" -Ec "import sys; assert sys.version_info[:2] >= (3, 8)" >nul 2>nul) && (set PYTHON="%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe") && (set _Py_Python_Source=found in externals directory) && goto :found || rmdir /Q /S "%_Py_EXTERNALS_DIR%\pythonx86" +@if exist "%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" ("%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" -Ec "import sys; assert sys.version_info[:2] >= (3, 10)" >nul 2>nul) && (set PYTHON="%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe") && (set _Py_Python_Source=found in externals directory) && goto :found || rmdir /Q /S "%_Py_EXTERNALS_DIR%\pythonx86" @rem If HOST_PYTHON is recent enough, use that -@if NOT "%HOST_PYTHON%"=="" @%HOST_PYTHON% -Ec "import sys; assert sys.version_info[:2] >= (3, 9)" >nul 2>nul && (set PYTHON="%HOST_PYTHON%") && (set _Py_Python_Source=found as HOST_PYTHON) && goto :found +@if NOT "%HOST_PYTHON%"=="" @%HOST_PYTHON% -Ec "import sys; assert sys.version_info[:2] >= (3, 10)" >nul 2>nul && (set PYTHON="%HOST_PYTHON%") && (set _Py_Python_Source=found as HOST_PYTHON) && goto :found @rem If py.exe finds a recent enough version, use that one @rem It is fine to add new versions to this list when they have released, @rem but we do not use prerelease builds here. -@for %%p in (3.12 3.11 3.10 3.9) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found +@for %%p in (3.12 3.11 3.10) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found @if NOT exist "%_Py_EXTERNALS_DIR%" mkdir "%_Py_EXTERNALS_DIR%" @set _Py_NUGET=%NUGET% From 5207adf228547273b0e8d0253c23c69b95d7fe11 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 09:03:14 +0000 Subject: [PATCH 04/23] build(deps-dev): bump mypy from 1.10.1 to 1.11.1 in /Tools (#122550) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Alex Waygood --- Tools/clinic/libclinic/converter.py | 4 +++- Tools/requirements-dev.txt | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Tools/clinic/libclinic/converter.py b/Tools/clinic/libclinic/converter.py index 86853bb4fba253..2abf06dc4e89a2 100644 --- a/Tools/clinic/libclinic/converter.py +++ b/Tools/clinic/libclinic/converter.py @@ -545,7 +545,9 @@ def closure(f: CConverterClassT) -> CConverterClassT: if not kwargs: added_f = f else: - added_f = functools.partial(f, **kwargs) + # type ignore due to a mypy regression :( + # https://github.com/python/mypy/issues/17646 + added_f = functools.partial(f, **kwargs) # type: ignore[misc] if format_unit: legacy_converters[format_unit] = added_f return f diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index de8496a17b85ef..cbf4072b500061 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -1,6 +1,6 @@ # Requirements file for external linters and checks we run on # Tools/clinic, Tools/cases_generator/, and Tools/peg_generator/ in CI -mypy==1.10.1 +mypy==1.11.1 # needed for peg_generator: types-psutil==6.0.0.20240621 From 1422500d020bd199b26357fc387f8b79b82226cd Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 5 Aug 2024 10:17:55 +0100 Subject: [PATCH 05/23] gh-121367: [doc] BUILD_TUPLE arg can be 0 (#122663) --- Doc/library/dis.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 26b13c87181000..440ca233584e57 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1081,11 +1081,15 @@ iterations of the loop. .. opcode:: BUILD_TUPLE (count) Creates a tuple consuming *count* items from the stack, and pushes the - resulting tuple onto the stack.:: + resulting tuple onto the stack:: - assert count > 0 - STACK, values = STACK[:-count], STACK[-count:] - STACK.append(tuple(values)) + if count == 0: + value = () + else: + STACK = STACK[:-count] + value = tuple(STACK[-count:]) + + STACK.append(value) .. opcode:: BUILD_LIST (count) From 1bb955a2fe0237721c141fdfe520fd3ba46db11e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 5 Aug 2024 16:21:32 +0300 Subject: [PATCH 06/23] gh-122459: Optimize pickling by name objects without __module__ (GH-122460) --- Lib/pickle.py | 97 ++++--- Lib/test/pickletester.py | 2 +- ...-07-30-15-57-07.gh-issue-122459.AYIoeN.rst | 2 + Modules/_pickle.c | 249 +++++++++--------- 4 files changed, 173 insertions(+), 177 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-30-15-57-07.gh-issue-122459.AYIoeN.rst diff --git a/Lib/pickle.py b/Lib/pickle.py index 299c9e0e5e5641..b8e114a79f2202 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -313,38 +313,45 @@ def load_frame(self, frame_size): # Tools used for pickling. -def _getattribute(obj, name): - top = obj - for subpath in name.split('.'): - if subpath == '': - raise AttributeError("Can't get local attribute {!r} on {!r}" - .format(name, top)) - try: - parent = obj - obj = getattr(obj, subpath) - except AttributeError: - raise AttributeError("Can't get attribute {!r} on {!r}" - .format(name, top)) from None - return obj, parent +def _getattribute(obj, dotted_path): + for subpath in dotted_path: + obj = getattr(obj, subpath) + return obj def whichmodule(obj, name): """Find the module an object belong to.""" + dotted_path = name.split('.') module_name = getattr(obj, '__module__', None) - if module_name is not None: - return module_name - # Protect the iteration by using a list copy of sys.modules against dynamic - # modules that trigger imports of other modules upon calls to getattr. - for module_name, module in sys.modules.copy().items(): - if (module_name == '__main__' - or module_name == '__mp_main__' # bpo-42406 - or module is None): - continue - try: - if _getattribute(module, name)[0] is obj: - return module_name - except AttributeError: - pass - return '__main__' + if module_name is None and '' not in dotted_path: + # Protect the iteration by using a list copy of sys.modules against dynamic + # modules that trigger imports of other modules upon calls to getattr. + for module_name, module in sys.modules.copy().items(): + if (module_name == '__main__' + or module_name == '__mp_main__' # bpo-42406 + or module is None): + continue + try: + if _getattribute(module, dotted_path) is obj: + return module_name + except AttributeError: + pass + module_name = '__main__' + elif module_name is None: + module_name = '__main__' + + try: + __import__(module_name, level=0) + module = sys.modules[module_name] + if _getattribute(module, dotted_path) is obj: + return module_name + except (ImportError, KeyError, AttributeError): + raise PicklingError( + "Can't pickle %r: it's not found as %s.%s" % + (obj, module_name, name)) from None + + raise PicklingError( + "Can't pickle %r: it's not the same object as %s.%s" % + (obj, module_name, name)) def encode_long(x): r"""Encode a long to a two's complement little-endian binary string. @@ -1074,24 +1081,10 @@ def save_global(self, obj, name=None): if name is None: name = getattr(obj, '__qualname__', None) - if name is None: - name = obj.__name__ + if name is None: + name = obj.__name__ module_name = whichmodule(obj, name) - try: - __import__(module_name, level=0) - module = sys.modules[module_name] - obj2, parent = _getattribute(module, name) - except (ImportError, KeyError, AttributeError): - raise PicklingError( - "Can't pickle %r: it's not found as %s.%s" % - (obj, module_name, name)) from None - else: - if obj2 is not obj: - raise PicklingError( - "Can't pickle %r: it's not the same object as %s.%s" % - (obj, module_name, name)) - if self.proto >= 2: code = _extension_registry.get((module_name, name)) if code: @@ -1103,10 +1096,7 @@ def save_global(self, obj, name=None): else: write(EXT4 + pack("= 3. + if self.proto >= 4: self.save(module_name) self.save(name) @@ -1616,7 +1606,16 @@ def find_class(self, module, name): module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) if self.proto >= 4: - return _getattribute(sys.modules[module], name)[0] + module = sys.modules[module] + dotted_path = name.split('.') + if '' in dotted_path: + raise AttributeError( + f"Can't get local attribute {name!r} on {module!r}") + try: + return _getattribute(module, dotted_path) + except AttributeError: + raise AttributeError( + f"Can't get attribute {name!r} on {module!r}") from None else: return getattr(sys.modules[module], name) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 3c936b3bc4029e..db42f13b0b98ab 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -2068,7 +2068,7 @@ def f(): self.dumps(f, proto) self.assertIn(str(cm.exception), { f"Can't pickle {f!r}: it's not found as {__name__}.{f.__qualname__}", - f"Can't get local object {f.__qualname__!r}"}) + f"Can't get local attribute {f.__qualname__!r} on {sys.modules[__name__]}"}) # Same without a __module__ attribute (exercises a different path # in _pickle.c). del f.__module__ diff --git a/Misc/NEWS.d/next/Library/2024-07-30-15-57-07.gh-issue-122459.AYIoeN.rst b/Misc/NEWS.d/next/Library/2024-07-30-15-57-07.gh-issue-122459.AYIoeN.rst new file mode 100644 index 00000000000000..595504048302da --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-30-15-57-07.gh-issue-122459.AYIoeN.rst @@ -0,0 +1,2 @@ +Optimize :mod:`pickling ` by name objects without the ``__module__`` +attribute. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 50c73dca0db281..5d9ee8cb6c679d 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1803,13 +1803,15 @@ memo_put(PickleState *st, PicklerObject *self, PyObject *obj) } static PyObject * -get_dotted_path(PyObject *obj, PyObject *name) +get_dotted_path(PyObject *name) +{ + return PyUnicode_Split(name, _Py_LATIN1_CHR('.'), -1); +} + +static int +check_dotted_path(PyObject *obj, PyObject *name, PyObject *dotted_path) { - PyObject *dotted_path; Py_ssize_t i, n; - dotted_path = PyUnicode_Split(name, _Py_LATIN1_CHR('.'), -1); - if (dotted_path == NULL) - return NULL; n = PyList_GET_SIZE(dotted_path); assert(n >= 1); for (i = 0; i < n; i++) { @@ -1821,61 +1823,33 @@ get_dotted_path(PyObject *obj, PyObject *name) else PyErr_Format(PyExc_AttributeError, "Can't get local attribute %R on %R", name, obj); - Py_DECREF(dotted_path); - return NULL; + return -1; } } - return dotted_path; + return 0; } static PyObject * -get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent) +getattribute(PyObject *obj, PyObject *names) { Py_ssize_t i, n; - PyObject *parent = NULL; assert(PyList_CheckExact(names)); Py_INCREF(obj); n = PyList_GET_SIZE(names); for (i = 0; i < n; i++) { PyObject *name = PyList_GET_ITEM(names, i); - Py_XSETREF(parent, obj); + PyObject *parent = obj; (void)PyObject_GetOptionalAttr(parent, name, &obj); + Py_DECREF(parent); if (obj == NULL) { - Py_DECREF(parent); return NULL; } } - if (pparent != NULL) - *pparent = parent; - else - Py_XDECREF(parent); return obj; } -static PyObject * -getattribute(PyObject *obj, PyObject *name, int allow_qualname) -{ - PyObject *dotted_path, *attr; - - if (allow_qualname) { - dotted_path = get_dotted_path(obj, name); - if (dotted_path == NULL) - return NULL; - attr = get_deep_attribute(obj, dotted_path, NULL); - Py_DECREF(dotted_path); - } - else { - (void)PyObject_GetOptionalAttr(obj, name, &attr); - } - if (attr == NULL && !PyErr_Occurred()) { - PyErr_Format(PyExc_AttributeError, - "Can't get attribute %R on %R", name, obj); - } - return attr; -} - static int _checkmodule(PyObject *module_name, PyObject *module, PyObject *global, PyObject *dotted_path) @@ -1888,7 +1862,7 @@ _checkmodule(PyObject *module_name, PyObject *module, return -1; } - PyObject *candidate = get_deep_attribute(module, dotted_path, NULL); + PyObject *candidate = getattribute(module, dotted_path); if (candidate == NULL) { return -1; } @@ -1901,7 +1875,7 @@ _checkmodule(PyObject *module_name, PyObject *module, } static PyObject * -whichmodule(PyObject *global, PyObject *dotted_path) +whichmodule(PickleState *st, PyObject *global, PyObject *global_name, PyObject *dotted_path) { PyObject *module_name; PyObject *module = NULL; @@ -1911,63 +1885,106 @@ whichmodule(PyObject *global, PyObject *dotted_path) if (PyObject_GetOptionalAttr(global, &_Py_ID(__module__), &module_name) < 0) { return NULL; } - if (module_name) { + if (module_name == NULL || module_name == Py_None) { /* In some rare cases (e.g., bound methods of extension types), __module__ can be None. If it is so, then search sys.modules for the module of global. */ - if (module_name != Py_None) - return module_name; Py_CLEAR(module_name); - } - assert(module_name == NULL); - - /* Fallback on walking sys.modules */ - PyThreadState *tstate = _PyThreadState_GET(); - modules = _PySys_GetAttr(tstate, &_Py_ID(modules)); - if (modules == NULL) { - PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules"); - return NULL; - } - if (PyDict_CheckExact(modules)) { - i = 0; - while (PyDict_Next(modules, &i, &module_name, &module)) { - if (_checkmodule(module_name, module, global, dotted_path) == 0) { - return Py_NewRef(module_name); - } - if (PyErr_Occurred()) { - return NULL; - } + if (check_dotted_path(NULL, global_name, dotted_path) < 0) { + return NULL; } - } - else { - PyObject *iterator = PyObject_GetIter(modules); - if (iterator == NULL) { + PyThreadState *tstate = _PyThreadState_GET(); + modules = _PySys_GetAttr(tstate, &_Py_ID(modules)); + if (modules == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules"); return NULL; } - while ((module_name = PyIter_Next(iterator))) { - module = PyObject_GetItem(modules, module_name); - if (module == NULL) { + if (PyDict_CheckExact(modules)) { + i = 0; + while (PyDict_Next(modules, &i, &module_name, &module)) { + Py_INCREF(module_name); + Py_INCREF(module); + if (_checkmodule(module_name, module, global, dotted_path) == 0) { + Py_DECREF(module); + return module_name; + } + Py_DECREF(module); Py_DECREF(module_name); - Py_DECREF(iterator); + if (PyErr_Occurred()) { + return NULL; + } + } + } + else { + PyObject *iterator = PyObject_GetIter(modules); + if (iterator == NULL) { return NULL; } - if (_checkmodule(module_name, module, global, dotted_path) == 0) { + while ((module_name = PyIter_Next(iterator))) { + module = PyObject_GetItem(modules, module_name); + if (module == NULL) { + Py_DECREF(module_name); + Py_DECREF(iterator); + return NULL; + } + if (_checkmodule(module_name, module, global, dotted_path) == 0) { + Py_DECREF(module); + Py_DECREF(iterator); + return module_name; + } Py_DECREF(module); - Py_DECREF(iterator); - return module_name; - } - Py_DECREF(module); - Py_DECREF(module_name); - if (PyErr_Occurred()) { - Py_DECREF(iterator); - return NULL; + Py_DECREF(module_name); + if (PyErr_Occurred()) { + Py_DECREF(iterator); + return NULL; + } } + Py_DECREF(iterator); + } + if (PyErr_Occurred()) { + return NULL; } - Py_DECREF(iterator); + + /* If no module is found, use __main__. */ + module_name = Py_NewRef(&_Py_ID(__main__)); } - /* If no module is found, use __main__. */ - return &_Py_ID(__main__); + /* XXX: Change to use the import C API directly with level=0 to disallow + relative imports. + + XXX: PyImport_ImportModuleLevel could be used. However, this bypasses + builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore + custom import functions (IMHO, this would be a nice security + feature). The import C API would need to be extended to support the + extra parameters of __import__ to fix that. */ + module = PyImport_Import(module_name); + if (module == NULL) { + PyErr_Format(st->PicklingError, + "Can't pickle %R: import of module %R failed", + global, module_name); + return NULL; + } + if (check_dotted_path(module, global_name, dotted_path) < 0) { + Py_DECREF(module); + return NULL; + } + PyObject *actual = getattribute(module, dotted_path); + Py_DECREF(module); + if (actual == NULL) { + PyErr_Format(st->PicklingError, + "Can't pickle %R: attribute lookup %S on %S failed", + global, global_name, module_name); + return NULL; + } + if (actual != global) { + Py_DECREF(actual); + PyErr_Format(st->PicklingError, + "Can't pickle %R: it's not the same object as %S.%S", + global, module_name, global_name); + return NULL; + } + Py_DECREF(actual); + return module_name; } /* fast_save_enter() and fast_save_leave() are guards against recursive @@ -3590,10 +3607,7 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, { PyObject *global_name = NULL; PyObject *module_name = NULL; - PyObject *module = NULL; - PyObject *parent = NULL; PyObject *dotted_path = NULL; - PyObject *cls; int status = 0; const char global_op = GLOBAL; @@ -3611,44 +3625,13 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, } } - dotted_path = get_dotted_path(module, global_name); + dotted_path = get_dotted_path(global_name); if (dotted_path == NULL) goto error; - module_name = whichmodule(obj, dotted_path); + module_name = whichmodule(st, obj, global_name, dotted_path); if (module_name == NULL) goto error; - /* XXX: Change to use the import C API directly with level=0 to disallow - relative imports. - - XXX: PyImport_ImportModuleLevel could be used. However, this bypasses - builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore - custom import functions (IMHO, this would be a nice security - feature). The import C API would need to be extended to support the - extra parameters of __import__ to fix that. */ - module = PyImport_Import(module_name); - if (module == NULL) { - PyErr_Format(st->PicklingError, - "Can't pickle %R: import of module %R failed", - obj, module_name); - goto error; - } - cls = get_deep_attribute(module, dotted_path, &parent); - if (cls == NULL) { - PyErr_Format(st->PicklingError, - "Can't pickle %R: attribute lookup %S on %S failed", - obj, global_name, module_name); - goto error; - } - if (cls != obj) { - Py_DECREF(cls); - PyErr_Format(st->PicklingError, - "Can't pickle %R: it's not the same object as %S.%S", - obj, module_name, global_name); - goto error; - } - Py_DECREF(cls); - if (self->proto >= 2) { /* See whether this is in the extension registry, and if * so generate an EXT opcode. @@ -3720,12 +3703,6 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, } else { gen_global: - if (parent == module) { - Py_SETREF(global_name, - Py_NewRef(PyList_GET_ITEM(dotted_path, - PyList_GET_SIZE(dotted_path) - 1))); - Py_CLEAR(dotted_path); - } if (self->proto >= 4) { const char stack_global_op = STACK_GLOBAL; @@ -3845,8 +3822,6 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, } Py_XDECREF(module_name); Py_XDECREF(global_name); - Py_XDECREF(module); - Py_XDECREF(parent); Py_XDECREF(dotted_path); return status; @@ -7063,7 +7038,27 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyTypeObject *cls, if (module == NULL) { return NULL; } - global = getattribute(module, global_name, self->proto >= 4); + if (self->proto >= 4) { + PyObject *dotted_path = get_dotted_path(global_name); + if (dotted_path == NULL) { + Py_DECREF(module); + return NULL; + } + if (check_dotted_path(module, global_name, dotted_path) < 0) { + Py_DECREF(dotted_path); + Py_DECREF(module); + return NULL; + } + global = getattribute(module, dotted_path); + Py_DECREF(dotted_path); + if (global == NULL && !PyErr_Occurred()) { + PyErr_Format(PyExc_AttributeError, + "Can't get attribute %R on %R", global_name, module); + } + } + else { + global = PyObject_GetAttr(module, global_name); + } Py_DECREF(module); return global; } From 5bd72912a1a85be96092de302608a4298741c6cd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 5 Aug 2024 16:27:48 +0100 Subject: [PATCH 07/23] GH-122616: Simplify LOAD_ATTR_WITH_HINT and STORE_ATTR_WITH_HINT (GH-122620) --- Python/bytecodes.c | 39 ++++++++---------------- Python/executor_cases.c.h | 61 +++++++++++++------------------------- Python/generated_cases.c.h | 39 ++++++++---------------- 3 files changed, 44 insertions(+), 95 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 48b74f93b92ce8..996f997d0ca8de 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2053,16 +2053,10 @@ dummy_func( PyDictObject *dict = _PyObject_GetManagedDict(owner_o); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); - if (DK_IS_UNICODE(dict->ma_keys)) { - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; - DEOPT_IF(ep->me_key != name); - attr_o = ep->me_value; - } - else { - PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; - DEOPT_IF(ep->me_key != name); - attr_o = ep->me_value; - } + DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys)); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; + DEOPT_IF(ep->me_key != name); + attr_o = ep->me_value; DEOPT_IF(attr_o == NULL); STAT_INC(LOAD_ATTR, hit); Py_INCREF(attr_o); @@ -2214,23 +2208,14 @@ dummy_func( DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries); PyObject *old_value; uint64_t new_version; - if (DK_IS_UNICODE(dict->ma_keys)) { - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; - DEOPT_IF(ep->me_key != name); - old_value = ep->me_value; - DEOPT_IF(old_value == NULL); - new_version = _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); - ep->me_value = PyStackRef_AsPyObjectSteal(value); - } - else { - PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; - DEOPT_IF(ep->me_key != name); - old_value = ep->me_value; - DEOPT_IF(old_value == NULL); - new_version = _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); - ep->me_value = PyStackRef_AsPyObjectSteal(value); - } - Py_DECREF(old_value); + DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys)); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; + DEOPT_IF(ep->me_key != name); + old_value = ep->me_value; + PyDict_WatchEvent event = old_value == NULL ? PyDict_EVENT_ADDED : PyDict_EVENT_MODIFIED; + new_version = _PyDict_NotifyEvent(tstate->interp, event, dict, name, PyStackRef_AsPyObjectBorrow(value)); + ep->me_value = PyStackRef_AsPyObjectSteal(value); + Py_XDECREF(old_value); STAT_INC(STORE_ATTR, hit); /* Ensure dict is GC tracked if it needs to be */ if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7f89196192504b..cbee77d5cf67fc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2367,22 +2367,16 @@ JUMP_TO_JUMP_TARGET(); } PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); - if (DK_IS_UNICODE(dict->ma_keys)) { - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; - if (ep->me_key != name) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - attr_o = ep->me_value; + if (!DK_IS_UNICODE(dict->ma_keys)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); } - else { - PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; - if (ep->me_key != name) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - attr_o = ep->me_value; + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; + if (ep->me_key != name) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); } + attr_o = ep->me_value; if (attr_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -2601,35 +2595,20 @@ } PyObject *old_value; uint64_t new_version; - if (DK_IS_UNICODE(dict->ma_keys)) { - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; - if (ep->me_key != name) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - old_value = ep->me_value; - if (old_value == NULL) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - new_version = _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); - ep->me_value = PyStackRef_AsPyObjectSteal(value); + if (!DK_IS_UNICODE(dict->ma_keys)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); } - else { - PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; - if (ep->me_key != name) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - old_value = ep->me_value; - if (old_value == NULL) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - new_version = _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); - ep->me_value = PyStackRef_AsPyObjectSteal(value); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; + if (ep->me_key != name) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); } - Py_DECREF(old_value); + old_value = ep->me_value; + PyDict_WatchEvent event = old_value == NULL ? PyDict_EVENT_ADDED : PyDict_EVENT_MODIFIED; + new_version = _PyDict_NotifyEvent(tstate->interp, event, dict, name, PyStackRef_AsPyObjectBorrow(value)); + ep->me_value = PyStackRef_AsPyObjectSteal(value); + Py_XDECREF(old_value); STAT_INC(STORE_ATTR, hit); /* Ensure dict is GC tracked if it needs to be */ if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index bed194e34d5376..879c40ab0cb6ba 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4907,16 +4907,10 @@ PyDictObject *dict = _PyObject_GetManagedDict(owner_o); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, LOAD_ATTR); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); - if (DK_IS_UNICODE(dict->ma_keys)) { - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; - DEOPT_IF(ep->me_key != name, LOAD_ATTR); - attr_o = ep->me_value; - } - else { - PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; - DEOPT_IF(ep->me_key != name, LOAD_ATTR); - attr_o = ep->me_value; - } + DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys), LOAD_ATTR); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; + DEOPT_IF(ep->me_key != name, LOAD_ATTR); + attr_o = ep->me_value; DEOPT_IF(attr_o == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); Py_INCREF(attr_o); @@ -6411,23 +6405,14 @@ DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR); PyObject *old_value; uint64_t new_version; - if (DK_IS_UNICODE(dict->ma_keys)) { - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; - DEOPT_IF(ep->me_key != name, STORE_ATTR); - old_value = ep->me_value; - DEOPT_IF(old_value == NULL, STORE_ATTR); - new_version = _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); - ep->me_value = PyStackRef_AsPyObjectSteal(value); - } - else { - PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; - DEOPT_IF(ep->me_key != name, STORE_ATTR); - old_value = ep->me_value; - DEOPT_IF(old_value == NULL, STORE_ATTR); - new_version = _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); - ep->me_value = PyStackRef_AsPyObjectSteal(value); - } - Py_DECREF(old_value); + DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys), STORE_ATTR); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; + DEOPT_IF(ep->me_key != name, STORE_ATTR); + old_value = ep->me_value; + PyDict_WatchEvent event = old_value == NULL ? PyDict_EVENT_ADDED : PyDict_EVENT_MODIFIED; + new_version = _PyDict_NotifyEvent(tstate->interp, event, dict, name, PyStackRef_AsPyObjectBorrow(value)); + ep->me_value = PyStackRef_AsPyObjectSteal(value); + Py_XDECREF(old_value); STAT_INC(STORE_ATTR, hit); /* Ensure dict is GC tracked if it needs to be */ if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { From 44659d392751f0161a0f958fec39ad013da45427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= Date: Tue, 6 Aug 2024 01:10:40 +0200 Subject: [PATCH 08/23] GH-118943: Handle races when moving jit_stencils.h (GH-120690) Co-authored-by: Kirill Podoprigora --- .../Build/2024-06-18-15-28-25.gh-issue-118943.aie7nn.rst | 3 +++ Tools/jit/_targets.py | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Build/2024-06-18-15-28-25.gh-issue-118943.aie7nn.rst diff --git a/Misc/NEWS.d/next/Build/2024-06-18-15-28-25.gh-issue-118943.aie7nn.rst b/Misc/NEWS.d/next/Build/2024-06-18-15-28-25.gh-issue-118943.aie7nn.rst new file mode 100644 index 00000000000000..997c990a96e476 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-06-18-15-28-25.gh-issue-118943.aie7nn.rst @@ -0,0 +1,3 @@ +Fix a possible race condition affecting parallel builds configured with +``--enable-experimental-jit``, in which :exc:`FileNotFoundError` could be caused by +another process already moving ``jit_stencils.h.new`` to ``jit_stencils.h``. diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 5604c429bcf8ad..73d10a128756eb 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -221,7 +221,12 @@ def build( file.write("\n") for line in _writer.dump(stencil_groups): file.write(f"{line}\n") - jit_stencils_new.replace(jit_stencils) + try: + jit_stencils_new.replace(jit_stencils) + except FileNotFoundError: + # another process probably already moved the file + if not jit_stencils.is_file(): + raise finally: jit_stencils_new.unlink(missing_ok=True) From 35ae4aab1aae93c1c11c45ac431787ff79ce7907 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 6 Aug 2024 06:56:50 +0300 Subject: [PATCH 09/23] gh-122686: Pin attrs package (for Hypothesis workflow) (#122687) Co-authored-by: Victor Stinner --- Tools/requirements-hypothesis.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Tools/requirements-hypothesis.txt b/Tools/requirements-hypothesis.txt index ab3f39ac6ee087..03f955ba8bf310 100644 --- a/Tools/requirements-hypothesis.txt +++ b/Tools/requirements-hypothesis.txt @@ -1,4 +1,7 @@ # Requirements file for hypothesis that # we use to run our property-based tests in CI. +# see https://github.com/python/cpython/issues/122686 +attrs<=23.2.0 + hypothesis==6.104.2 From b0c48b8fd88f26b31ec2f743358091073277dcde Mon Sep 17 00:00:00 2001 From: Malcolm Smith Date: Tue, 6 Aug 2024 05:28:58 +0100 Subject: [PATCH 10/23] gh-116622: Android logging fixes (#122698) Modifies the handling of stdout/stderr redirection on Android to accomodate the rate and buffer size limits imposed by Android's logging infrastructure. --- Lib/_android_support.py | 128 +++++++++++++---- Lib/test/test_android.py | 132 +++++++++++++++--- ...-08-05-19-04-06.gh-issue-116622.3LWUzE.rst | 1 + 3 files changed, 213 insertions(+), 48 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst diff --git a/Lib/_android_support.py b/Lib/_android_support.py index 590e85ea8c2db1..d5d13ec6a48e14 100644 --- a/Lib/_android_support.py +++ b/Lib/_android_support.py @@ -1,19 +1,20 @@ import io import sys - +from threading import RLock +from time import sleep, time # The maximum length of a log message in bytes, including the level marker and -# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD in -# platform/system/logging/liblog/include/log/log.h. As of API level 30, messages -# longer than this will be be truncated by logcat. This limit has already been -# reduced at least once in the history of Android (from 4076 to 4068 between API -# level 23 and 26), so leave some headroom. +# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71. +# Messages longer than this will be be truncated by logcat. This limit has already +# been reduced at least once in the history of Android (from 4076 to 4068 between +# API level 23 and 26), so leave some headroom. MAX_BYTES_PER_WRITE = 4000 # UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this -# size ensures that TextIOWrapper can always avoid exceeding MAX_BYTES_PER_WRITE. +# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE. # However, if the actual number of bytes per character is smaller than that, -# then TextIOWrapper may still join multiple consecutive text writes into binary +# then we may still join multiple consecutive text writes into binary # writes containing a larger number of characters. MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4 @@ -26,18 +27,22 @@ def init_streams(android_log_write, stdout_prio, stderr_prio): if sys.executable: return # Not embedded in an app. + global logcat + logcat = Logcat(android_log_write) + sys.stdout = TextLogStream( - android_log_write, stdout_prio, "python.stdout", errors=sys.stdout.errors) + stdout_prio, "python.stdout", errors=sys.stdout.errors) sys.stderr = TextLogStream( - android_log_write, stderr_prio, "python.stderr", errors=sys.stderr.errors) + stderr_prio, "python.stderr", errors=sys.stderr.errors) class TextLogStream(io.TextIOWrapper): - def __init__(self, android_log_write, prio, tag, **kwargs): + def __init__(self, prio, tag, **kwargs): kwargs.setdefault("encoding", "UTF-8") - kwargs.setdefault("line_buffering", True) - super().__init__(BinaryLogStream(android_log_write, prio, tag), **kwargs) - self._CHUNK_SIZE = MAX_BYTES_PER_WRITE + super().__init__(BinaryLogStream(prio, tag), **kwargs) + self._lock = RLock() + self._pending_bytes = [] + self._pending_bytes_count = 0 def __repr__(self): return f"" @@ -52,19 +57,48 @@ def write(self, s): s = str.__str__(s) # We want to emit one log message per line wherever possible, so split - # the string before sending it to the superclass. Note that - # "".splitlines() == [], so nothing will be logged for an empty string. - for line in s.splitlines(keepends=True): - while line: - super().write(line[:MAX_CHARS_PER_WRITE]) - line = line[MAX_CHARS_PER_WRITE:] + # the string into lines first. Note that "".splitlines() == [], so + # nothing will be logged for an empty string. + with self._lock: + for line in s.splitlines(keepends=True): + while line: + chunk = line[:MAX_CHARS_PER_WRITE] + line = line[MAX_CHARS_PER_WRITE:] + self._write_chunk(chunk) return len(s) + # The size and behavior of TextIOWrapper's buffer is not part of its public + # API, so we handle buffering ourselves to avoid truncation. + def _write_chunk(self, s): + b = s.encode(self.encoding, self.errors) + if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE: + self.flush() + + self._pending_bytes.append(b) + self._pending_bytes_count += len(b) + if ( + self.write_through + or b.endswith(b"\n") + or self._pending_bytes_count > MAX_BYTES_PER_WRITE + ): + self.flush() + + def flush(self): + with self._lock: + self.buffer.write(b"".join(self._pending_bytes)) + self._pending_bytes.clear() + self._pending_bytes_count = 0 + + # Since this is a line-based logging system, line buffering cannot be turned + # off, i.e. a newline always causes a flush. + @property + def line_buffering(self): + return True + class BinaryLogStream(io.RawIOBase): - def __init__(self, android_log_write, prio, tag): - self.android_log_write = android_log_write + def __init__(self, prio, tag): self.prio = prio self.tag = tag @@ -85,10 +119,48 @@ def write(self, b): # Writing an empty string to the stream should have no effect. if b: - # Encode null bytes using "modified UTF-8" to avoid truncating the - # message. This should not affect the return value, as the caller - # may be expecting it to match the length of the input. - self.android_log_write(self.prio, self.tag, - b.replace(b"\x00", b"\xc0\x80")) - + logcat.write(self.prio, self.tag, b) return len(b) + + +# When a large volume of data is written to logcat at once, e.g. when a test +# module fails in --verbose3 mode, there's a risk of overflowing logcat's own +# buffer and losing messages. We avoid this by imposing a rate limit using the +# token bucket algorithm, based on a conservative estimate of how fast `adb +# logcat` can consume data. +MAX_BYTES_PER_SECOND = 1024 * 1024 + +# The logcat buffer size of a device can be determined by running `logcat -g`. +# We set the token bucket size to half of the buffer size of our current minimum +# API level, because other things on the system will be producing messages as +# well. +BUCKET_SIZE = 128 * 1024 + +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39 +PER_MESSAGE_OVERHEAD = 28 + + +class Logcat: + def __init__(self, android_log_write): + self.android_log_write = android_log_write + self._lock = RLock() + self._bucket_level = 0 + self._prev_write_time = time() + + def write(self, prio, tag, message): + # Encode null bytes using "modified UTF-8" to avoid them truncating the + # message. + message = message.replace(b"\x00", b"\xc0\x80") + + with self._lock: + now = time() + self._bucket_level += ( + (now - self._prev_write_time) * MAX_BYTES_PER_SECOND) + self._bucket_level = min(self._bucket_level, BUCKET_SIZE) + self._prev_write_time = now + + self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message) + if self._bucket_level < 0: + sleep(-self._bucket_level / MAX_BYTES_PER_SECOND) + + self.android_log_write(prio, tag, message) diff --git a/Lib/test/test_android.py b/Lib/test/test_android.py index 115882a4c281f6..82035061bb6fdd 100644 --- a/Lib/test/test_android.py +++ b/Lib/test/test_android.py @@ -1,14 +1,17 @@ +import io import platform import queue import re import subprocess import sys import unittest +from _android_support import TextLogStream from array import array -from contextlib import contextmanager +from contextlib import ExitStack, contextmanager from threading import Thread from test.support import LOOPBACK_TIMEOUT -from time import time +from time import sleep, time +from unittest.mock import patch if sys.platform != "android": @@ -81,18 +84,39 @@ def unbuffered(self, stream): finally: stream.reconfigure(write_through=False) + # In --verbose3 mode, sys.stdout and sys.stderr are captured, so we can't + # test them directly. Detect this mode and use some temporary streams with + # the same properties. + def stream_context(self, stream_name, level): + # https://developer.android.com/ndk/reference/group/logging + prio = {"I": 4, "W": 5}[level] + + stack = ExitStack() + stack.enter_context(self.subTest(stream_name)) + stream = getattr(sys, stream_name) + if isinstance(stream, io.StringIO): + stack.enter_context( + patch( + f"sys.{stream_name}", + TextLogStream( + prio, f"python.{stream_name}", errors="backslashreplace" + ), + ) + ) + return stack + def test_str(self): for stream_name, level in [("stdout", "I"), ("stderr", "W")]: - with self.subTest(stream=stream_name): + with self.stream_context(stream_name, level): stream = getattr(sys, stream_name) tag = f"python.{stream_name}" self.assertEqual(f"", repr(stream)) - self.assertTrue(stream.writable()) - self.assertFalse(stream.readable()) + self.assertIs(stream.writable(), True) + self.assertIs(stream.readable(), False) self.assertEqual("UTF-8", stream.encoding) - self.assertTrue(stream.line_buffering) - self.assertFalse(stream.write_through) + self.assertIs(stream.line_buffering, True) + self.assertIs(stream.write_through, False) # stderr is backslashreplace by default; stdout is configured # that way by libregrtest.main. @@ -147,6 +171,13 @@ def write(s, lines=None, *, write_len=None): write("f\n\ng", ["exxf", ""]) write("\n", ["g"]) + # Since this is a line-based logging system, line buffering + # cannot be turned off, i.e. a newline always causes a flush. + stream.reconfigure(line_buffering=False) + self.assertIs(stream.line_buffering, True) + + # However, buffering can be turned off completely if you want a + # flush after every write. with self.unbuffered(stream): write("\nx", ["", "x"]) write("\na\n", ["", "a"]) @@ -209,30 +240,30 @@ def __str__(self): # (MAX_BYTES_PER_WRITE). # # ASCII (1 byte per character) - write(("foobar" * 700) + "\n", - [("foobar" * 666) + "foob", # 4000 bytes - "ar" + ("foobar" * 33)]) # 200 bytes + write(("foobar" * 700) + "\n", # 4200 bytes in + [("foobar" * 666) + "foob", # 4000 bytes out + "ar" + ("foobar" * 33)]) # 200 bytes out # "Full-width" digits 0-9 (3 bytes per character) s = "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19" - write((s * 150) + "\n", - [s * 100, # 3000 bytes - s * 50]) # 1500 bytes + write((s * 150) + "\n", # 4500 bytes in + [s * 100, # 3000 bytes out + s * 50]) # 1500 bytes out s = "0123456789" - write(s * 200, []) - write(s * 150, []) - write(s * 51, [s * 350]) # 3500 bytes - write("\n", [s * 51]) # 510 bytes + write(s * 200, []) # 2000 bytes in + write(s * 150, []) # 1500 bytes in + write(s * 51, [s * 350]) # 510 bytes in, 3500 bytes out + write("\n", [s * 51]) # 0 bytes in, 510 bytes out def test_bytes(self): for stream_name, level in [("stdout", "I"), ("stderr", "W")]: - with self.subTest(stream=stream_name): + with self.stream_context(stream_name, level): stream = getattr(sys, stream_name).buffer tag = f"python.{stream_name}" self.assertEqual(f"", repr(stream)) - self.assertTrue(stream.writable()) - self.assertFalse(stream.readable()) + self.assertIs(stream.writable(), True) + self.assertIs(stream.readable(), False) def write(b, lines=None, *, write_len=None): if write_len is None: @@ -330,3 +361,64 @@ def write(b, lines=None, *, write_len=None): fr"{type(obj).__name__}" ): stream.write(obj) + + def test_rate_limit(self): + # https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39 + PER_MESSAGE_OVERHEAD = 28 + + # https://developer.android.com/ndk/reference/group/logging + ANDROID_LOG_DEBUG = 3 + + # To avoid flooding the test script output, use a different tag rather + # than stdout or stderr. + tag = "python.rate_limit" + stream = TextLogStream(ANDROID_LOG_DEBUG, tag) + + # Make a test message which consumes 1 KB of the logcat buffer. + message = "Line {:03d} " + message += "." * ( + 1024 - PER_MESSAGE_OVERHEAD - len(tag) - len(message.format(0)) + ) + "\n" + + # See _android_support.py. The default values of these parameters work + # well across a wide range of devices, but we'll use smaller values to + # ensure a quick and reliable test that doesn't flood the log too much. + MAX_KB_PER_SECOND = 100 + BUCKET_KB = 10 + with ( + patch("_android_support.MAX_BYTES_PER_SECOND", MAX_KB_PER_SECOND * 1024), + patch("_android_support.BUCKET_SIZE", BUCKET_KB * 1024), + ): + # Make sure the token bucket is full. + sleep(BUCKET_KB / MAX_KB_PER_SECOND) + line_num = 0 + + # Write BUCKET_KB messages, and return the rate at which they were + # accepted in KB per second. + def write_bucketful(): + nonlocal line_num + start = time() + max_line_num = line_num + BUCKET_KB + while line_num < max_line_num: + stream.write(message.format(line_num)) + line_num += 1 + return BUCKET_KB / (time() - start) + + # The first bucketful should be written with minimal delay. The + # factor of 2 here is not arbitrary: it verifies that the system can + # write fast enough to empty the bucket within two bucketfuls, which + # the next part of the test depends on. + self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2) + + # Write another bucketful to empty the token bucket completely. + write_bucketful() + + # The next bucketful should be written at the rate limit. + self.assertAlmostEqual( + write_bucketful(), MAX_KB_PER_SECOND, + delta=MAX_KB_PER_SECOND * 0.1 + ) + + # Once the token bucket refills, we should go back to full speed. + sleep(BUCKET_KB / MAX_KB_PER_SECOND) + self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst b/Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst new file mode 100644 index 00000000000000..9320928477af2c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst @@ -0,0 +1 @@ +Fix Android stdout and stderr messages being truncated or lost. From 94a4bd79a7ab7b0ff5f216782d6fdaff6ed348fc Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Tue, 6 Aug 2024 08:57:36 +0300 Subject: [PATCH 11/23] gh-122704: Fix reference leak in Modules/_pickle.c (GH-122705) --- Modules/_pickle.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 5d9ee8cb6c679d..dc0ef0a184d205 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1962,9 +1962,11 @@ whichmodule(PickleState *st, PyObject *global, PyObject *global_name, PyObject * PyErr_Format(st->PicklingError, "Can't pickle %R: import of module %R failed", global, module_name); + Py_DECREF(module_name); return NULL; } if (check_dotted_path(module, global_name, dotted_path) < 0) { + Py_DECREF(module_name); Py_DECREF(module); return NULL; } @@ -1974,6 +1976,7 @@ whichmodule(PickleState *st, PyObject *global, PyObject *global_name, PyObject * PyErr_Format(st->PicklingError, "Can't pickle %R: attribute lookup %S on %S failed", global, global_name, module_name); + Py_DECREF(module_name); return NULL; } if (actual != global) { @@ -1981,6 +1984,7 @@ whichmodule(PickleState *st, PyObject *global, PyObject *global_name, PyObject * PyErr_Format(st->PicklingError, "Can't pickle %R: it's not the same object as %S.%S", global, module_name, global_name); + Py_DECREF(module_name); return NULL; } Py_DECREF(actual); From e74680b7186e6823ea37cf7ab326d3d6bfa6f59a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 6 Aug 2024 08:59:44 +0300 Subject: [PATCH 12/23] gh-122595: Add more error checks in the compiler (GH-122596) --- Python/compile.c | 78 +++++++++++++++++++++--------- Python/symtable.c | 119 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 145 insertions(+), 52 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 87b2c2705474a4..9695a99d201144 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -505,21 +505,35 @@ dictbytype(PyObject *src, int scope_type, int flag, Py_ssize_t offset) deterministic, then the generated bytecode is not deterministic. */ sorted_keys = PyDict_Keys(src); - if (sorted_keys == NULL) + if (sorted_keys == NULL) { + Py_DECREF(dest); return NULL; + } if (PyList_Sort(sorted_keys) != 0) { Py_DECREF(sorted_keys); + Py_DECREF(dest); return NULL; } num_keys = PyList_GET_SIZE(sorted_keys); for (key_i = 0; key_i < num_keys; key_i++) { - /* XXX this should probably be a macro in symtable.h */ - long vi; k = PyList_GET_ITEM(sorted_keys, key_i); v = PyDict_GetItemWithError(src, k); - assert(v && PyLong_Check(v)); - vi = PyLong_AS_LONG(v); + if (!v) { + if (!PyErr_Occurred()) { + PyErr_SetObject(PyExc_KeyError, k); + } + Py_DECREF(sorted_keys); + Py_DECREF(dest); + return NULL; + } + long vi = PyLong_AsLong(v); + if (vi == -1 && PyErr_Occurred()) { + Py_DECREF(sorted_keys); + Py_DECREF(dest); + return NULL; + } + /* XXX this should probably be a macro in symtable.h */ scope = (vi >> SCOPE_OFFSET) & SCOPE_MASK; if (scope == scope_type || vi & flag) { @@ -631,6 +645,7 @@ compiler_set_qualname(struct compiler *c) scope = _PyST_GetScope(parent->u_ste, mangled); Py_DECREF(mangled); + RETURN_IF_ERROR(scope); assert(scope != GLOBAL_IMPLICIT); if (scope == GLOBAL_EXPLICIT) force_global = 1; @@ -1648,7 +1663,7 @@ dict_lookup_arg(PyObject *dict, PyObject *name) if (v == NULL) { return ERROR; } - return PyLong_AS_LONG(v); + return PyLong_AsLong(v); } static int @@ -1671,7 +1686,7 @@ compiler_lookup_arg(struct compiler *c, PyCodeObject *co, PyObject *name) else { arg = dict_lookup_arg(c->u->u_metadata.u_freevars, name); } - if (arg == -1) { + if (arg == -1 && !PyErr_Occurred()) { PyObject *freevars = _PyCode_GetFreevars(co); if (freevars == NULL) { PyErr_Clear(); @@ -4085,6 +4100,8 @@ compiler_nameop(struct compiler *c, location loc, case GLOBAL_EXPLICIT: optype = OP_GLOBAL; break; + case -1: + goto error; default: /* scope can be 0 */ break; @@ -4638,6 +4655,7 @@ is_import_originated(struct compiler *c, expr_ty e) } long flags = _PyST_GetSymbol(SYMTABLE(c)->st_top, e->v.Name.id); + RETURN_IF_ERROR(flags); return flags & DEF_IMPORT; } @@ -4657,10 +4675,12 @@ can_optimize_super_call(struct compiler *c, expr_ty attr) PyObject *super_name = e->v.Call.func->v.Name.id; // detect statically-visible shadowing of 'super' name int scope = _PyST_GetScope(SYMTABLE_ENTRY(c), super_name); + RETURN_IF_ERROR(scope); if (scope != GLOBAL_IMPLICIT) { return 0; } scope = _PyST_GetScope(SYMTABLE(c)->st_top, super_name); + RETURN_IF_ERROR(scope); if (scope != 0) { return 0; } @@ -4767,7 +4787,9 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) } /* Check that the base object is not something that is imported */ - if (is_import_originated(c, meth->v.Attribute.value)) { + int ret = is_import_originated(c, meth->v.Attribute.value); + RETURN_IF_ERROR(ret); + if (ret) { return 0; } @@ -4795,7 +4817,9 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) /* Alright, we can optimize the code. */ location loc = LOC(meth); - if (can_optimize_super_call(c, meth)) { + ret = can_optimize_super_call(c, meth); + RETURN_IF_ERROR(ret); + if (ret) { RETURN_IF_ERROR(load_args_for_super(c, meth->v.Attribute.value)); int opcode = asdl_seq_LEN(meth->v.Attribute.value->v.Call.args) ? LOAD_SUPER_METHOD : LOAD_ZERO_SUPER_METHOD; @@ -5367,8 +5391,10 @@ push_inlined_comprehension_state(struct compiler *c, location loc, PyObject *k, *v; Py_ssize_t pos = 0; while (PyDict_Next(entry->ste_symbols, &pos, &k, &v)) { - assert(PyLong_Check(v)); - long symbol = PyLong_AS_LONG(v); + long symbol = PyLong_AsLong(v); + if (symbol == -1 && PyErr_Occurred()) { + return ERROR; + } long scope = (symbol >> SCOPE_OFFSET) & SCOPE_MASK; PyObject *outv = PyDict_GetItemWithError(SYMTABLE_ENTRY(c)->ste_symbols, k); if (outv == NULL) { @@ -5377,8 +5403,11 @@ push_inlined_comprehension_state(struct compiler *c, location loc, } outv = _PyLong_GetZero(); } - assert(PyLong_CheckExact(outv)); - long outsc = (PyLong_AS_LONG(outv) >> SCOPE_OFFSET) & SCOPE_MASK; + long outsymbol = PyLong_AsLong(outv); + if (outsymbol == -1 && PyErr_Occurred()) { + return ERROR; + } + long outsc = (outsymbol >> SCOPE_OFFSET) & SCOPE_MASK; // If a name has different scope inside than outside the comprehension, // we need to temporarily handle it with the right scope while // compiling the comprehension. If it's free in the comprehension @@ -6064,14 +6093,18 @@ compiler_visit_expr(struct compiler *c, expr_ty e) return compiler_formatted_value(c, e); /* The following exprs can be assignment targets. */ case Attribute_kind: - if (e->v.Attribute.ctx == Load && can_optimize_super_call(c, e)) { - RETURN_IF_ERROR(load_args_for_super(c, e->v.Attribute.value)); - int opcode = asdl_seq_LEN(e->v.Attribute.value->v.Call.args) ? - LOAD_SUPER_ATTR : LOAD_ZERO_SUPER_ATTR; - ADDOP_NAME(c, loc, opcode, e->v.Attribute.attr, names); - loc = update_start_location_to_match_attr(c, loc, e); - ADDOP(c, loc, NOP); - return SUCCESS; + if (e->v.Attribute.ctx == Load) { + int ret = can_optimize_super_call(c, e); + RETURN_IF_ERROR(ret); + if (ret) { + RETURN_IF_ERROR(load_args_for_super(c, e->v.Attribute.value)); + int opcode = asdl_seq_LEN(e->v.Attribute.value->v.Call.args) ? + LOAD_SUPER_ATTR : LOAD_ZERO_SUPER_ATTR; + ADDOP_NAME(c, loc, opcode, e->v.Attribute.attr, names); + loc = update_start_location_to_match_attr(c, loc, e); + ADDOP(c, loc, NOP); + return SUCCESS; + } } RETURN_IF_ERROR(compiler_maybe_add_static_attribute_to_class(c, e)); VISIT(c, expr, e->v.Attribute.value); @@ -7300,7 +7333,8 @@ consts_dict_keys_inorder(PyObject *dict) if (consts == NULL) return NULL; while (PyDict_Next(dict, &pos, &k, &v)) { - i = PyLong_AS_LONG(v); + assert(PyLong_CheckExact(v)); + i = PyLong_AsLong(v); /* The keys of the dictionary can be tuples wrapping a constant. * (see dict_add_o and _PyCode_ConstantKey). In that case * the object we want is always second. */ diff --git a/Python/symtable.c b/Python/symtable.c index ef81a0799de3aa..4acf762f8fca39 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -526,17 +526,31 @@ _PySymtable_LookupOptional(struct symtable *st, void *key, long _PyST_GetSymbol(PySTEntryObject *ste, PyObject *name) { - PyObject *v = PyDict_GetItemWithError(ste->ste_symbols, name); - if (!v) + PyObject *v; + if (PyDict_GetItemRef(ste->ste_symbols, name, &v) < 0) { + return -1; + } + if (!v) { return 0; - assert(PyLong_Check(v)); - return PyLong_AS_LONG(v); + } + long symbol = PyLong_AsLong(v); + Py_DECREF(v); + if (symbol < 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_SystemError, "invalid symbol"); + } + return -1; + } + return symbol; } int _PyST_GetScope(PySTEntryObject *ste, PyObject *name) { long symbol = _PyST_GetSymbol(ste, name); + if (symbol < 0) { + return -1; + } return (symbol >> SCOPE_OFFSET) & SCOPE_MASK; } @@ -715,11 +729,14 @@ analyze_name(PySTEntryObject *ste, PyObject *scopes, PyObject *name, long flags, // global statement), we want to also treat it as a global in this scope. if (class_entry != NULL) { long class_flags = _PyST_GetSymbol(class_entry, name); + if (class_flags < 0) { + return 0; + } if (class_flags & DEF_GLOBAL) { SET_SCOPE(scopes, name, GLOBAL_EXPLICIT); return 1; } - else if (class_flags & DEF_BOUND && !(class_flags & DEF_NONLOCAL)) { + else if ((class_flags & DEF_BOUND) && !(class_flags & DEF_NONLOCAL)) { SET_SCOPE(scopes, name, GLOBAL_IMPLICIT); return 1; } @@ -763,6 +780,9 @@ is_free_in_any_child(PySTEntryObject *entry, PyObject *key) PySTEntryObject *child_ste = (PySTEntryObject *)PyList_GET_ITEM( entry->ste_children, i); long scope = _PyST_GetScope(child_ste, key); + if (scope < 0) { + return -1; + } if (scope == FREE) { return 1; } @@ -781,7 +801,10 @@ inline_comprehension(PySTEntryObject *ste, PySTEntryObject *comp, while (PyDict_Next(comp->ste_symbols, &pos, &k, &v)) { // skip comprehension parameter - long comp_flags = PyLong_AS_LONG(v); + long comp_flags = PyLong_AsLong(v); + if (comp_flags == -1 && PyErr_Occurred()) { + return 0; + } if (comp_flags & DEF_PARAM) { assert(_PyUnicode_EqualToASCIIString(k, ".0")); continue; @@ -822,11 +845,19 @@ inline_comprehension(PySTEntryObject *ste, PySTEntryObject *comp, SET_SCOPE(scopes, k, scope); } else { - if (PyLong_AsLong(existing) & DEF_BOUND) { + long flags = PyLong_AsLong(existing); + if (flags == -1 && PyErr_Occurred()) { + return 0; + } + if ((flags & DEF_BOUND) && ste->ste_type != ClassBlock) { // free vars in comprehension that are locals in outer scope can // now simply be locals, unless they are free in comp children, // or if the outer scope is a class block - if (!is_free_in_any_child(comp, k) && ste->ste_type != ClassBlock) { + int ok = is_free_in_any_child(comp, k); + if (ok < 0) { + return 0; + } + if (!ok) { if (PySet_Discard(comp_free, k) < 0) { return 0; } @@ -861,9 +892,10 @@ analyze_cells(PyObject *scopes, PyObject *free, PyObject *inlined_cells) if (!v_cell) return 0; while (PyDict_Next(scopes, &pos, &name, &v)) { - long scope; - assert(PyLong_Check(v)); - scope = PyLong_AS_LONG(v); + long scope = PyLong_AsLong(v); + if (scope == -1 && PyErr_Occurred()) { + goto error; + } if (scope != LOCAL) continue; int contains = PySet_Contains(free, name); @@ -926,9 +958,10 @@ update_symbols(PyObject *symbols, PyObject *scopes, /* Update scope information for all symbols in this scope */ while (PyDict_Next(symbols, &pos, &name, &v)) { - long scope, flags; - assert(PyLong_Check(v)); - flags = PyLong_AS_LONG(v); + long flags = PyLong_AsLong(v); + if (flags == -1 && PyErr_Occurred()) { + return 0; + } int contains = PySet_Contains(inlined_cells, name); if (contains < 0) { return 0; @@ -936,9 +969,18 @@ update_symbols(PyObject *symbols, PyObject *scopes, if (contains) { flags |= DEF_COMP_CELL; } - v_scope = PyDict_GetItemWithError(scopes, name); - assert(v_scope && PyLong_Check(v_scope)); - scope = PyLong_AS_LONG(v_scope); + if (PyDict_GetItemRef(scopes, name, &v_scope) < 0) { + return 0; + } + if (!v_scope) { + PyErr_SetObject(PyExc_KeyError, name); + return 0; + } + long scope = PyLong_AsLong(v_scope); + Py_DECREF(v_scope); + if (scope == -1 && PyErr_Occurred()) { + return 0; + } flags |= (scope << SCOPE_OFFSET); v_new = PyLong_FromLong(flags); if (!v_new) @@ -971,7 +1013,11 @@ update_symbols(PyObject *symbols, PyObject *scopes, or global in the class scope. */ if (classflag) { - long flags = PyLong_AS_LONG(v) | DEF_FREE_CLASS; + long flags = PyLong_AsLong(v); + if (flags == -1 && PyErr_Occurred()) { + goto error; + } + flags |= DEF_FREE_CLASS; v_new = PyLong_FromLong(flags); if (!v_new) { goto error; @@ -1110,7 +1156,10 @@ analyze_block(PySTEntryObject *ste, PyObject *bound, PyObject *free, } while (PyDict_Next(ste->ste_symbols, &pos, &name, &v)) { - long flags = PyLong_AS_LONG(v); + long flags = PyLong_AsLong(v); + if (flags == -1 && PyErr_Occurred()) { + goto error; + } if (!analyze_name(ste, scopes, name, flags, bound, local, free, global, type_params, class_entry)) goto error; @@ -1395,9 +1444,12 @@ symtable_lookup_entry(struct symtable *st, PySTEntryObject *ste, PyObject *name) { PyObject *mangled = _Py_MaybeMangle(st->st_private, ste, name); if (!mangled) - return 0; + return -1; long ret = _PyST_GetSymbol(ste, mangled); Py_DECREF(mangled); + if (ret < 0) { + return -1; + } return ret; } @@ -1420,7 +1472,10 @@ symtable_add_def_helper(struct symtable *st, PyObject *name, int flag, struct _s return 0; dict = ste->ste_symbols; if ((o = PyDict_GetItemWithError(dict, mangled))) { - val = PyLong_AS_LONG(o); + val = PyLong_AsLong(o); + if (val == -1 && PyErr_Occurred()) { + goto error; + } if ((flag & DEF_PARAM) && (val & DEF_PARAM)) { /* Is it better to use 'mangled' or 'name' here? */ PyErr_Format(PyExc_SyntaxError, DUPLICATE_ARGUMENT, name); @@ -1466,16 +1521,20 @@ symtable_add_def_helper(struct symtable *st, PyObject *name, int flag, struct _s if (flag & DEF_PARAM) { if (PyList_Append(ste->ste_varnames, mangled) < 0) goto error; - } else if (flag & DEF_GLOBAL) { + } else if (flag & DEF_GLOBAL) { /* XXX need to update DEF_GLOBAL for other flags too; perhaps only DEF_FREE_GLOBAL */ - val = flag; + val = 0; if ((o = PyDict_GetItemWithError(st->st_global, mangled))) { - val |= PyLong_AS_LONG(o); + val = PyLong_AsLong(o); + if (val == -1 && PyErr_Occurred()) { + goto error; + } } else if (PyErr_Occurred()) { goto error; } + val |= flag; o = PyLong_FromLong(val); if (o == NULL) goto error; @@ -2176,6 +2235,9 @@ symtable_extend_namedexpr_scope(struct symtable *st, expr_ty e) */ if (ste->ste_comprehension) { long target_in_scope = symtable_lookup_entry(st, ste, target_name); + if (target_in_scope < 0) { + return 0; + } if ((target_in_scope & DEF_COMP_ITER) && (target_in_scope & DEF_LOCAL)) { PyErr_Format(PyExc_SyntaxError, NAMED_EXPR_COMP_CONFLICT, target_name); @@ -2188,6 +2250,9 @@ symtable_extend_namedexpr_scope(struct symtable *st, expr_ty e) /* If we find a FunctionBlock entry, add as GLOBAL/LOCAL or NONLOCAL/LOCAL */ if (ste->ste_type == FunctionBlock) { long target_in_scope = symtable_lookup_entry(st, ste, target_name); + if (target_in_scope < 0) { + return 0; + } if (target_in_scope & DEF_GLOBAL) { if (!symtable_add_def(st, target_name, DEF_GLOBAL, LOCATION(e))) return 0; @@ -2601,9 +2666,6 @@ symtable_visit_params(struct symtable *st, asdl_arg_seq *args) { Py_ssize_t i; - if (!args) - return -1; - for (i = 0; i < asdl_seq_LEN(args); i++) { arg_ty arg = (arg_ty)asdl_seq_GET(args, i); if (!symtable_add_def(st, arg->arg, DEF_PARAM, LOCATION(arg))) @@ -2650,9 +2712,6 @@ symtable_visit_argannotations(struct symtable *st, asdl_arg_seq *args) { Py_ssize_t i; - if (!args) - return -1; - for (i = 0; i < asdl_seq_LEN(args); i++) { arg_ty arg = (arg_ty)asdl_seq_GET(args, i); if (arg->annotation) { From b72c748d7fb4ecc0bc4626c7bc05fbc6c83f0ba8 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Mon, 5 Aug 2024 23:16:29 -0700 Subject: [PATCH 13/23] Fix syntax in generate_re_casefix.py (#122699) This was broken in gh-97963. --- Lib/re/_casefix.py | 2 +- Tools/build/generate_re_casefix.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/re/_casefix.py b/Lib/re/_casefix.py index 06507d08bee02b..fed2d84fc01473 100644 --- a/Lib/re/_casefix.py +++ b/Lib/re/_casefix.py @@ -1,4 +1,4 @@ -# Auto-generated by Tools/scripts/generate_re_casefix.py. +# Auto-generated by Tools/build/generate_re_casefix.py. # Maps the code of lowercased character to codes of different lowercased # characters which have the same uppercase. diff --git a/Tools/build/generate_re_casefix.py b/Tools/build/generate_re_casefix.py index b57ac07426c27c..6cebfbd025c58c 100755 --- a/Tools/build/generate_re_casefix.py +++ b/Tools/build/generate_re_casefix.py @@ -23,9 +23,9 @@ def update_file(file, content): # Maps the code of lowercased character to codes of different lowercased # characters which have the same uppercase. -_EXTRA_CASES = { +_EXTRA_CASES = {{ %s -} +}} """ def uname(i): From a8be8fc6c4682089be45a87bd5ee1f686040116c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 6 Aug 2024 08:40:39 +0100 Subject: [PATCH 14/23] GH-120024: Refactor code a bit so that escaping calls can be wrapped in spill code in code generator (GH-122693) --- Python/bytecodes.c | 50 ++++++++++++++++++++++++-------------- Python/executor_cases.c.h | 24 ++++++++++++------ Python/generated_cases.c.h | 46 +++++++++++++++++++++++------------ 3 files changed, 78 insertions(+), 42 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 996f997d0ca8de..a8527fe84b76f5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -287,7 +287,8 @@ dummy_func( /* Need to create a fake StopIteration error here, * to conform to PEP 380 */ if (PyStackRef_GenCheck(receiver)) { - if (monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value))) { + int err = monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value)); + if (err) { ERROR_NO_POP(); } } @@ -302,7 +303,8 @@ dummy_func( tier1 inst(INSTRUMENTED_END_SEND, (receiver, value -- value)) { PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver); if (PyGen_Check(receiver_o) || PyCoro_CheckExact(receiver_o)) { - if (monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value))) { + int err = monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value)); + if (err) { ERROR_NO_POP(); } } @@ -1069,11 +1071,12 @@ dummy_func( PyStackRef_AsPyObjectBorrow(v)); } if (retval_o == NULL) { - if (_PyErr_ExceptionMatches(tstate, PyExc_StopIteration) - ) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (matches) { _PyEval_MonitorRaise(tstate, frame, this_instr); } - if (_PyGen_FetchStopIterationValue(&retval_o) == 0) { + int err = _PyGen_FetchStopIterationValue(&retval_o); + if (err == 0) { assert(retval_o != NULL); JUMPBY(oparg); } @@ -1210,7 +1213,8 @@ dummy_func( assert(throwflag); assert(exc_value && PyExceptionInstance_Check(exc_value)); - if (PyErr_GivenExceptionMatches(exc_value, PyExc_StopIteration)) { + int matches = PyErr_GivenExceptionMatches(exc_value, PyExc_StopIteration); + if (matches) { value = PyStackRef_FromPyObjectNew(((PyStopIterationObject *)exc_value)->value); DECREF_INPUTS(); none = PyStackRef_None; @@ -1425,7 +1429,8 @@ dummy_func( inst(LOAD_FROM_DICT_OR_GLOBALS, (mod_or_class_dict -- v)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); PyObject *v_o; - if (PyMapping_GetOptionalItem(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &v_o) < 0) { + int err = PyMapping_GetOptionalItem(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &v_o); + if (err < 0) { ERROR_NO_POP(); } if (v_o == NULL) { @@ -1596,7 +1601,8 @@ dummy_func( assert(class_dict); assert(oparg >= 0 && oparg < _PyFrame_GetCode(frame)->co_nlocalsplus); name = PyTuple_GET_ITEM(_PyFrame_GetCode(frame)->co_localsplusnames, oparg); - if (PyMapping_GetOptionalItem(class_dict, name, &value_o) < 0) { + int err = PyMapping_GetOptionalItem(class_dict, name, &value_o); + if (err < 0) { ERROR_NO_POP(); } if (!value_o) { @@ -1676,7 +1682,8 @@ dummy_func( PyObject *none_val = _PyList_Extend((PyListObject *)list, iterable); if (none_val == NULL) { - if (_PyErr_ExceptionMatches(tstate, PyExc_TypeError) && + int matches = _PyErr_ExceptionMatches(tstate, PyExc_TypeError); + if (matches && (Py_TYPE(iterable)->tp_iter == NULL && !PySequence_Check(iterable))) { _PyErr_Clear(tstate); @@ -1762,8 +1769,10 @@ dummy_func( PyObject *dict_o = PyStackRef_AsPyObjectBorrow(dict); PyObject *update_o = PyStackRef_AsPyObjectBorrow(update); - if (PyDict_Update(dict_o, update_o) < 0) { - if (_PyErr_ExceptionMatches(tstate, PyExc_AttributeError)) { + int err = PyDict_Update(dict_o, update_o); + if (err < 0) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_AttributeError); + if (matches) { _PyErr_Format(tstate, PyExc_TypeError, "'%.200s' object is not a mapping", Py_TYPE(update_o)->tp_name); @@ -1779,7 +1788,8 @@ dummy_func( PyObject *dict_o = PyStackRef_AsPyObjectBorrow(dict); PyObject *update_o = PyStackRef_AsPyObjectBorrow(update); - if (_PyDict_MergeEx(dict_o, update_o, 2) < 0) { + int err = _PyDict_MergeEx(dict_o, update_o, 2); + if (err < 0) { _PyEval_FormatKwargsError(tstate, callable_o, update_o); DECREF_INPUTS(); ERROR_IF(true, error); @@ -1943,7 +1953,8 @@ dummy_func( if (oparg & 1) { /* Designed to work in tandem with CALL, pushes two values. */ attr_o = NULL; - if (_PyObject_GetMethod(PyStackRef_AsPyObjectBorrow(owner), name, &attr_o)) { + int is_meth = _PyObject_GetMethod(PyStackRef_AsPyObjectBorrow(owner), name, &attr_o); + if (is_meth) { /* We can bypass temporary bound method object. meth is unbound method and obj is self. meth | self | arg1 | ... | argN @@ -2416,8 +2427,8 @@ dummy_func( inst(CHECK_EG_MATCH, (exc_value_st, match_type_st -- rest, match)) { PyObject *exc_value = PyStackRef_AsPyObjectBorrow(exc_value_st); PyObject *match_type = PyStackRef_AsPyObjectBorrow(match_type_st); - - if (_PyEval_CheckExceptStarTypeValid(tstate, match_type) < 0) { + int err = _PyEval_CheckExceptStarTypeValid(tstate, match_type); + if (err < 0) { DECREF_INPUTS(); ERROR_IF(true, error); } @@ -2704,7 +2715,8 @@ dummy_func( if (next_o == NULL) { next = PyStackRef_NULL; if (_PyErr_Occurred(tstate)) { - if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (!matches) { ERROR_NO_POP(); } _PyEval_MonitorRaise(tstate, frame, this_instr); @@ -2729,7 +2741,8 @@ dummy_func( PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); if (next_o == NULL) { if (_PyErr_Occurred(tstate)) { - if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (!matches) { ERROR_NO_POP(); } _PyEval_MonitorRaise(tstate, frame, frame->instr_ptr); @@ -2756,7 +2769,8 @@ dummy_func( } else { if (_PyErr_Occurred(tstate)) { - if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (!matches) { ERROR_NO_POP(); } _PyEval_MonitorRaise(tstate, frame, this_instr); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cbee77d5cf67fc..7f520eb7abbe80 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1777,7 +1777,8 @@ assert(class_dict); assert(oparg >= 0 && oparg < _PyFrame_GetCode(frame)->co_nlocalsplus); name = PyTuple_GET_ITEM(_PyFrame_GetCode(frame)->co_localsplusnames, oparg); - if (PyMapping_GetOptionalItem(class_dict, name, &value_o) < 0) { + int err = PyMapping_GetOptionalItem(class_dict, name, &value_o); + if (err < 0) { JUMP_TO_ERROR(); } if (!value_o) { @@ -1907,7 +1908,8 @@ PyObject *iterable = PyStackRef_AsPyObjectBorrow(iterable_st); PyObject *none_val = _PyList_Extend((PyListObject *)list, iterable); if (none_val == NULL) { - if (_PyErr_ExceptionMatches(tstate, PyExc_TypeError) && + int matches = _PyErr_ExceptionMatches(tstate, PyExc_TypeError); + if (matches && (Py_TYPE(iterable)->tp_iter == NULL && !PySequence_Check(iterable))) { _PyErr_Clear(tstate); @@ -2031,8 +2033,10 @@ dict = stack_pointer[-2 - (oparg - 1)]; PyObject *dict_o = PyStackRef_AsPyObjectBorrow(dict); PyObject *update_o = PyStackRef_AsPyObjectBorrow(update); - if (PyDict_Update(dict_o, update_o) < 0) { - if (_PyErr_ExceptionMatches(tstate, PyExc_AttributeError)) { + int err = PyDict_Update(dict_o, update_o); + if (err < 0) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_AttributeError); + if (matches) { _PyErr_Format(tstate, PyExc_TypeError, "'%.200s' object is not a mapping", Py_TYPE(update_o)->tp_name); @@ -2057,7 +2061,8 @@ PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); PyObject *dict_o = PyStackRef_AsPyObjectBorrow(dict); PyObject *update_o = PyStackRef_AsPyObjectBorrow(update); - if (_PyDict_MergeEx(dict_o, update_o, 2) < 0) { + int err = _PyDict_MergeEx(dict_o, update_o, 2); + if (err < 0) { _PyEval_FormatKwargsError(tstate, callable_o, update_o); PyStackRef_CLOSE(update); if (true) JUMP_TO_ERROR(); @@ -2182,7 +2187,8 @@ if (oparg & 1) { /* Designed to work in tandem with CALL, pushes two values. */ attr_o = NULL; - if (_PyObject_GetMethod(PyStackRef_AsPyObjectBorrow(owner), name, &attr_o)) { + int is_meth = _PyObject_GetMethod(PyStackRef_AsPyObjectBorrow(owner), name, &attr_o); + if (is_meth) { /* We can bypass temporary bound method object. meth is unbound method and obj is self. meth | self | arg1 | ... | argN @@ -2855,7 +2861,8 @@ exc_value_st = stack_pointer[-2]; PyObject *exc_value = PyStackRef_AsPyObjectBorrow(exc_value_st); PyObject *match_type = PyStackRef_AsPyObjectBorrow(match_type_st); - if (_PyEval_CheckExceptStarTypeValid(tstate, match_type) < 0) { + int err = _PyEval_CheckExceptStarTypeValid(tstate, match_type); + if (err < 0) { PyStackRef_CLOSE(exc_value_st); PyStackRef_CLOSE(match_type_st); if (true) JUMP_TO_ERROR(); @@ -3101,7 +3108,8 @@ PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); if (next_o == NULL) { if (_PyErr_Occurred(tstate)) { - if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (!matches) { JUMP_TO_ERROR(); } _PyEval_MonitorRaise(tstate, frame, frame->instr_ptr); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 879c40ab0cb6ba..31490960d3828a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2465,7 +2465,8 @@ exc_value_st = stack_pointer[-2]; PyObject *exc_value = PyStackRef_AsPyObjectBorrow(exc_value_st); PyObject *match_type = PyStackRef_AsPyObjectBorrow(match_type_st); - if (_PyEval_CheckExceptStarTypeValid(tstate, match_type) < 0) { + int err = _PyEval_CheckExceptStarTypeValid(tstate, match_type); + if (err < 0) { PyStackRef_CLOSE(exc_value_st); PyStackRef_CLOSE(match_type_st); if (true) goto pop_2_error; @@ -2528,7 +2529,8 @@ PyObject *exc_value = PyStackRef_AsPyObjectBorrow(exc_value_st); assert(throwflag); assert(exc_value && PyExceptionInstance_Check(exc_value)); - if (PyErr_GivenExceptionMatches(exc_value, PyExc_StopIteration)) { + int matches = PyErr_GivenExceptionMatches(exc_value, PyExc_StopIteration); + if (matches) { value = PyStackRef_FromPyObjectNew(((PyStopIterationObject *)exc_value)->value); PyStackRef_CLOSE(sub_iter_st); PyStackRef_CLOSE(last_sent_val_st); @@ -2982,7 +2984,8 @@ PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); PyObject *dict_o = PyStackRef_AsPyObjectBorrow(dict); PyObject *update_o = PyStackRef_AsPyObjectBorrow(update); - if (_PyDict_MergeEx(dict_o, update_o, 2) < 0) { + int err = _PyDict_MergeEx(dict_o, update_o, 2); + if (err < 0) { _PyEval_FormatKwargsError(tstate, callable_o, update_o); PyStackRef_CLOSE(update); if (true) goto pop_1_error; @@ -3003,8 +3006,10 @@ dict = stack_pointer[-2 - (oparg - 1)]; PyObject *dict_o = PyStackRef_AsPyObjectBorrow(dict); PyObject *update_o = PyStackRef_AsPyObjectBorrow(update); - if (PyDict_Update(dict_o, update_o) < 0) { - if (_PyErr_ExceptionMatches(tstate, PyExc_AttributeError)) { + int err = PyDict_Update(dict_o, update_o); + if (err < 0) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_AttributeError); + if (matches) { _PyErr_Format(tstate, PyExc_TypeError, "'%.200s' object is not a mapping", Py_TYPE(update_o)->tp_name); @@ -3208,7 +3213,8 @@ if (next_o == NULL) { next = PyStackRef_NULL; if (_PyErr_Occurred(tstate)) { - if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (!matches) { goto error; } _PyEval_MonitorRaise(tstate, frame, this_instr); @@ -3786,7 +3792,8 @@ /* Need to create a fake StopIteration error here, * to conform to PEP 380 */ if (PyStackRef_GenCheck(receiver)) { - if (monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value))) { + int err = monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value)); + if (err) { goto error; } } @@ -3807,7 +3814,8 @@ receiver = stack_pointer[-2]; PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver); if (PyGen_Check(receiver_o) || PyCoro_CheckExact(receiver_o)) { - if (monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value))) { + int err = monitor_stop_iteration(tstate, frame, this_instr, PyStackRef_AsPyObjectBorrow(value)); + if (err) { goto error; } } @@ -3834,7 +3842,8 @@ } else { if (_PyErr_Occurred(tstate)) { - if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (!matches) { goto error; } _PyEval_MonitorRaise(tstate, frame, this_instr); @@ -4327,7 +4336,8 @@ PyObject *iterable = PyStackRef_AsPyObjectBorrow(iterable_st); PyObject *none_val = _PyList_Extend((PyListObject *)list, iterable); if (none_val == NULL) { - if (_PyErr_ExceptionMatches(tstate, PyExc_TypeError) && + int matches = _PyErr_ExceptionMatches(tstate, PyExc_TypeError); + if (matches && (Py_TYPE(iterable)->tp_iter == NULL && !PySequence_Check(iterable))) { _PyErr_Clear(tstate); @@ -4379,7 +4389,8 @@ if (oparg & 1) { /* Designed to work in tandem with CALL, pushes two values. */ attr_o = NULL; - if (_PyObject_GetMethod(PyStackRef_AsPyObjectBorrow(owner), name, &attr_o)) { + int is_meth = _PyObject_GetMethod(PyStackRef_AsPyObjectBorrow(owner), name, &attr_o); + if (is_meth) { /* We can bypass temporary bound method object. meth is unbound method and obj is self. meth | self | arg1 | ... | argN @@ -5074,7 +5085,8 @@ assert(class_dict); assert(oparg >= 0 && oparg < _PyFrame_GetCode(frame)->co_nlocalsplus); name = PyTuple_GET_ITEM(_PyFrame_GetCode(frame)->co_localsplusnames, oparg); - if (PyMapping_GetOptionalItem(class_dict, name, &value_o) < 0) { + int err = PyMapping_GetOptionalItem(class_dict, name, &value_o); + if (err < 0) { goto error; } if (!value_o) { @@ -5100,7 +5112,8 @@ mod_or_class_dict = stack_pointer[-1]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); PyObject *v_o; - if (PyMapping_GetOptionalItem(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &v_o) < 0) { + int err = PyMapping_GetOptionalItem(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &v_o); + if (err < 0) { goto error; } if (v_o == NULL) { @@ -6080,11 +6093,12 @@ PyStackRef_AsPyObjectBorrow(v)); } if (retval_o == NULL) { - if (_PyErr_ExceptionMatches(tstate, PyExc_StopIteration) - ) { + int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); + if (matches) { _PyEval_MonitorRaise(tstate, frame, this_instr); } - if (_PyGen_FetchStopIterationValue(&retval_o) == 0) { + int err = _PyGen_FetchStopIterationValue(&retval_o); + if (err == 0) { assert(retval_o != NULL); JUMPBY(oparg); } From ce0d66c8d238c9676c6ecd3f04294a3299e07f74 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 6 Aug 2024 13:29:57 +0200 Subject: [PATCH 15/23] gh-122581: Avoid data races when collecting parser statistics (#122694) --- Include/internal/pycore_parser.h | 19 +++++++++++++++++++ Parser/pegen.c | 17 +++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/Include/internal/pycore_parser.h b/Include/internal/pycore_parser.h index 067b34c12c4e7f..b16084aaa15515 100644 --- a/Include/internal/pycore_parser.h +++ b/Include/internal/pycore_parser.h @@ -21,6 +21,9 @@ extern "C" { struct _parser_runtime_state { #ifdef Py_DEBUG long memo_statistics[_PYPEGEN_NSTATISTICS]; +#ifdef Py_GIL_DISABLED + PyMutex mutex; +#endif #else int _not_used; #endif @@ -28,8 +31,10 @@ struct _parser_runtime_state { }; _Py_DECLARE_STR(empty, "") +#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) #define _parser_runtime_state_INIT \ { \ + .mutex = {0}, \ .dummy_name = { \ .kind = Name_kind, \ .v.Name.id = &_Py_STR(empty), \ @@ -40,6 +45,20 @@ _Py_DECLARE_STR(empty, "") .end_col_offset = 0, \ }, \ } +#else +#define _parser_runtime_state_INIT \ + { \ + .dummy_name = { \ + .kind = Name_kind, \ + .v.Name.id = &_Py_STR(empty), \ + .v.Name.ctx = Load, \ + .lineno = 1, \ + .col_offset = 0, \ + .end_lineno = 1, \ + .end_col_offset = 0, \ + }, \ + } +#endif extern struct _mod* _PyParser_ASTFromString( const char *str, diff --git a/Parser/pegen.c b/Parser/pegen.c index ac428be0958bdf..0c3c4689dd7ce6 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -296,12 +296,22 @@ _PyPegen_fill_token(Parser *p) #define NSTATISTICS _PYPEGEN_NSTATISTICS #define memo_statistics _PyRuntime.parser.memo_statistics +#ifdef Py_GIL_DISABLED +#define MUTEX_LOCK() PyMutex_Lock(&_PyRuntime.parser.mutex) +#define MUTEX_UNLOCK() PyMutex_Unlock(&_PyRuntime.parser.mutex) +#else +#define MUTEX_LOCK() +#define MUTEX_UNLOCK() +#endif + void _PyPegen_clear_memo_statistics(void) { + MUTEX_LOCK(); for (int i = 0; i < NSTATISTICS; i++) { memo_statistics[i] = 0; } + MUTEX_UNLOCK(); } PyObject * @@ -311,18 +321,23 @@ _PyPegen_get_memo_statistics(void) if (ret == NULL) { return NULL; } + + MUTEX_LOCK(); for (int i = 0; i < NSTATISTICS; i++) { PyObject *value = PyLong_FromLong(memo_statistics[i]); if (value == NULL) { + MUTEX_UNLOCK(); Py_DECREF(ret); return NULL; } // PyList_SetItem borrows a reference to value. if (PyList_SetItem(ret, i, value) < 0) { + MUTEX_UNLOCK(); Py_DECREF(ret); return NULL; } } + MUTEX_UNLOCK(); return ret; } #endif @@ -348,7 +363,9 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres) if (count <= 0) { count = 1; } + MUTEX_LOCK(); memo_statistics[type] += count; + MUTEX_UNLOCK(); } #endif p->mark = m->mark; From fbfab4f88cf061ce4b8ec4d876d247221326d8fa Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 6 Aug 2024 13:04:33 +0100 Subject: [PATCH 16/23] GH-120024: Refactor code generators to uses classes for emitting code. (GH-122730) --- Tools/cases_generator/generators_common.py | 298 ++++++++++--------- Tools/cases_generator/optimizer_generator.py | 14 +- Tools/cases_generator/tier1_generator.py | 37 +-- Tools/cases_generator/tier2_generator.py | 197 ++++++------ 4 files changed, 272 insertions(+), 274 deletions(-) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index ab8c99f1e25f97..2a339f8cd6bb66 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -57,169 +57,171 @@ def emit_to(out: CWriter, tkn_iter: Iterator[Token], end: str) -> None: parens -= 1 out.emit(tkn) +ReplacementFunctionType = Callable[ + [Token, Iterator[Token], Uop, Stack, Instruction | None], None +] -def replace_deopt( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - unused: Stack, - inst: Instruction | None, -) -> None: - out.emit_at("DEOPT_IF", tkn) - out.emit(next(tkn_iter)) - emit_to(out, tkn_iter, "RPAREN") - next(tkn_iter) # Semi colon - out.emit(", ") - assert inst is not None - assert inst.family is not None - out.emit(inst.family.name) - out.emit(");\n") - +class Emitter: -def replace_error( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - stack: Stack, - inst: Instruction | None, -) -> None: - out.emit_at("if ", tkn) - out.emit(next(tkn_iter)) - emit_to(out, tkn_iter, "COMMA") - label = next(tkn_iter).text - next(tkn_iter) # RPAREN - next(tkn_iter) # Semi colon - out.emit(") ") - c_offset = stack.peek_offset() - try: - offset = -int(c_offset) - except ValueError: - offset = -1 - if offset > 0: - out.emit(f"goto pop_{offset}_") - out.emit(label) - out.emit(";\n") - elif offset == 0: - out.emit("goto ") - out.emit(label) - out.emit(";\n") - else: - out.emit("{\n") - stack.flush_locally(out) - out.emit("goto ") - out.emit(label) - out.emit(";\n") - out.emit("}\n") + out: CWriter + _replacers: dict[str, ReplacementFunctionType] + def __init__(self, out: CWriter): + self._replacers = { + "EXIT_IF": self.exit_if, + "DEOPT_IF": self.deopt_if, + "ERROR_IF": self.error_if, + "ERROR_NO_POP": self.error_no_pop, + "DECREF_INPUTS": self.decref_inputs, + "CHECK_EVAL_BREAKER": self.check_eval_breaker, + "SYNC_SP": self.sync_sp, + } + self.out = out -def replace_error_no_pop( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - stack: Stack, - inst: Instruction | None, -) -> None: - next(tkn_iter) # LPAREN - next(tkn_iter) # RPAREN - next(tkn_iter) # Semi colon - out.emit_at("goto error;", tkn) + def deopt_if( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + unused: Stack, + inst: Instruction | None, + ) -> None: + self.out.emit_at("DEOPT_IF", tkn) + self.out.emit(next(tkn_iter)) + emit_to(self.out, tkn_iter, "RPAREN") + next(tkn_iter) # Semi colon + self.out.emit(", ") + assert inst is not None + assert inst.family is not None + self.out.emit(inst.family.name) + self.out.emit(");\n") + exit_if = deopt_if -def replace_decrefs( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - stack: Stack, - inst: Instruction | None, -) -> None: - next(tkn_iter) - next(tkn_iter) - next(tkn_iter) - out.emit_at("", tkn) - for var in uop.stack.inputs: - if var.name == "unused" or var.name == "null" or var.peek: - continue - if var.size: - out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n") - out.emit(f"PyStackRef_CLOSE({var.name}[_i]);\n") - out.emit("}\n") - elif var.condition: - if var.condition == "1": - out.emit(f"PyStackRef_CLOSE({var.name});\n") - elif var.condition != "0": - out.emit(f"PyStackRef_XCLOSE({var.name});\n") + def error_if( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, + ) -> None: + self.out.emit_at("if ", tkn) + self.out.emit(next(tkn_iter)) + emit_to(self.out, tkn_iter, "COMMA") + label = next(tkn_iter).text + next(tkn_iter) # RPAREN + next(tkn_iter) # Semi colon + self.out.emit(") ") + c_offset = stack.peek_offset() + try: + offset = -int(c_offset) + except ValueError: + offset = -1 + if offset > 0: + self.out.emit(f"goto pop_{offset}_") + self.out.emit(label) + self.out.emit(";\n") + elif offset == 0: + self.out.emit("goto ") + self.out.emit(label) + self.out.emit(";\n") else: - out.emit(f"PyStackRef_CLOSE({var.name});\n") + self.out.emit("{\n") + stack.flush_locally(self.out) + self.out.emit("goto ") + self.out.emit(label) + self.out.emit(";\n") + self.out.emit("}\n") + def error_no_pop( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, + ) -> None: + next(tkn_iter) # LPAREN + next(tkn_iter) # RPAREN + next(tkn_iter) # Semi colon + self.out.emit_at("goto error;", tkn) -def replace_sync_sp( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - stack: Stack, - inst: Instruction | None, -) -> None: - next(tkn_iter) - next(tkn_iter) - next(tkn_iter) - stack.flush(out) - + def decref_inputs( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, + ) -> None: + next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + self.out.emit_at("", tkn) + for var in uop.stack.inputs: + if var.name == "unused" or var.name == "null" or var.peek: + continue + if var.size: + self.out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n") + self.out.emit(f"PyStackRef_CLOSE({var.name}[_i]);\n") + self.out.emit("}\n") + elif var.condition: + if var.condition == "1": + self.out.emit(f"PyStackRef_CLOSE({var.name});\n") + elif var.condition != "0": + self.out.emit(f"PyStackRef_XCLOSE({var.name});\n") + else: + self.out.emit(f"PyStackRef_CLOSE({var.name});\n") -def replace_check_eval_breaker( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - stack: Stack, - inst: Instruction | None, -) -> None: - next(tkn_iter) - next(tkn_iter) - next(tkn_iter) - if not uop.properties.ends_with_eval_breaker: - out.emit_at("CHECK_EVAL_BREAKER();", tkn) + def sync_sp( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, + ) -> None: + next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + stack.flush(self.out) -REPLACEMENT_FUNCTIONS = { - "EXIT_IF": replace_deopt, - "DEOPT_IF": replace_deopt, - "ERROR_IF": replace_error, - "ERROR_NO_POP": replace_error_no_pop, - "DECREF_INPUTS": replace_decrefs, - "CHECK_EVAL_BREAKER": replace_check_eval_breaker, - "SYNC_SP": replace_sync_sp, -} - -ReplacementFunctionType = Callable[ - [CWriter, Token, Iterator[Token], Uop, Stack, Instruction | None], None -] + def check_eval_breaker( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, + ) -> None: + next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + if not uop.properties.ends_with_eval_breaker: + self.out.emit_at("CHECK_EVAL_BREAKER();", tkn) -def emit_tokens( - out: CWriter, - uop: Uop, - stack: Stack, - inst: Instruction | None, - replacement_functions: Mapping[ - str, ReplacementFunctionType - ] = REPLACEMENT_FUNCTIONS, -) -> None: - tkns = uop.body[1:-1] - if not tkns: - return - tkn_iter = iter(tkns) - out.start_line() - for tkn in tkn_iter: - if tkn.kind == "IDENTIFIER" and tkn.text in replacement_functions: - replacement_functions[tkn.text](out, tkn, tkn_iter, uop, stack, inst) - else: - out.emit(tkn) + def emit_tokens( + self, + uop: Uop, + stack: Stack, + inst: Instruction | None, + ) -> None: + tkns = uop.body[1:-1] + if not tkns: + return + tkn_iter = iter(tkns) + self.out.start_line() + for tkn in tkn_iter: + if tkn.kind == "IDENTIFIER" and tkn.text in self._replacers: + self._replacers[tkn.text](tkn, tkn_iter, uop, stack, inst) + else: + self.out.emit(tkn) + def emit(self, txt: str | Token) -> None: + self.out.emit(txt) def cflags(p: Properties) -> str: flags: list[str] = [] diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index f6c2fea40f0dbb..e192b76b23319c 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -17,8 +17,7 @@ DEFAULT_INPUT, ROOT, write_header, - emit_tokens, - replace_sync_sp, + Emitter, ) from cwriter import CWriter from typing import TextIO, Iterator @@ -89,6 +88,10 @@ def emit_default(out: CWriter, uop: Uop) -> None: else: out.emit(f"{var.name} = sym_new_not_null(ctx);\n") +class OptimizerEmitter(Emitter): + + pass + def write_uop( override: Uop | None, @@ -126,11 +129,8 @@ def write_uop( cast = f"uint{cache.size*16}_t" out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n") if override: - replacement_funcs = { - "DECREF_INPUTS": decref_inputs, - "SYNC_SP": replace_sync_sp, - } - emit_tokens(out, override, stack, None, replacement_funcs) + emitter = OptimizerEmitter(out) + emitter.emit_tokens(override, stack, None) else: emit_default(out, uop) diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py index 1cdafbd35caea3..6c13d1f10b39f9 100644 --- a/Tools/cases_generator/tier1_generator.py +++ b/Tools/cases_generator/tier1_generator.py @@ -20,8 +20,8 @@ DEFAULT_INPUT, ROOT, write_header, - emit_tokens, type_and_null, + Emitter, ) from cwriter import CWriter from typing import TextIO @@ -62,26 +62,26 @@ def declare_variables(inst: Instruction, out: CWriter) -> None: declare_variable(var, out) def write_uop( - uop: Part, out: CWriter, offset: int, stack: Stack, inst: Instruction, braces: bool + uop: Part, emitter: Emitter, offset: int, stack: Stack, inst: Instruction, braces: bool ) -> int: # out.emit(stack.as_comment() + "\n") if isinstance(uop, Skip): entries = "entries" if uop.size > 1 else "entry" - out.emit(f"/* Skip {uop.size} cache {entries} */\n") + emitter.emit(f"/* Skip {uop.size} cache {entries} */\n") return offset + uop.size if isinstance(uop, Flush): - out.emit(f"// flush\n") - stack.flush(out) + emitter.emit(f"// flush\n") + stack.flush(emitter.out) return offset try: locals: dict[str, Local] = {} - out.start_line() + emitter.out.start_line() if braces: - out.emit(f"// {uop.name}\n") + emitter.out.emit(f"// {uop.name}\n") peeks: list[Local] = [] for var in reversed(uop.stack.inputs): code, local = stack.pop(var) - out.emit(code) + emitter.emit(code) if var.peek: peeks.append(local) if local.defined: @@ -91,8 +91,8 @@ def write_uop( while peeks: stack.push(peeks.pop()) if braces: - out.emit("{\n") - out.emit(stack.define_output_arrays(uop.stack.outputs)) + emitter.emit("{\n") + emitter.out.emit(stack.define_output_arrays(uop.stack.outputs)) for cache in uop.caches: if cache.name != "unused": @@ -102,13 +102,13 @@ def write_uop( else: type = f"uint{cache.size*16}_t " reader = f"read_u{cache.size*16}" - out.emit( + emitter.emit( f"{type}{cache.name} = {reader}(&this_instr[{offset}].cache);\n" ) if inst.family is None: - out.emit(f"(void){cache.name};\n") + emitter.emit(f"(void){cache.name};\n") offset += cache.size - emit_tokens(out, uop, stack, inst) + emitter.emit_tokens(uop, stack, inst) for i, var in enumerate(uop.stack.outputs): if not var.peek: if var.name in locals: @@ -117,11 +117,11 @@ def write_uop( local = Local.unused(var) else: local = Local.local(var) - out.emit(stack.push(local)) + emitter.emit(stack.push(local)) if braces: - out.start_line() - out.emit("}\n") - # out.emit(stack.as_comment() + "\n") + emitter.out.start_line() + emitter.emit("}\n") + # emitter.emit(stack.as_comment() + "\n") return offset except StackError as ex: raise analysis_error(ex.args[0], uop.body[0]) @@ -152,6 +152,7 @@ def generate_tier1( """ ) out = CWriter(outfile, 2, lines) + emitter = Emitter(out) out.emit("\n") for name, inst in sorted(analysis.instructions.items()): needs_this = uses_this(inst) @@ -183,7 +184,7 @@ def generate_tier1( for part in inst.parts: # Only emit braces if more than one uop insert_braces = len([p for p in inst.parts if isinstance(p, Uop)]) > 1 - offset = write_uop(part, out, offset, stack, inst, insert_braces) + offset = write_uop(part, emitter, offset, stack, inst, insert_braces) out.start_line() if not inst.parts[-1].properties.always_exits: stack.flush(out) diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 18bab2c13e7eb7..8c212f75878984 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -16,11 +16,10 @@ from generators_common import ( DEFAULT_INPUT, ROOT, - write_header, - emit_tokens, emit_to, - REPLACEMENT_FUNCTIONS, + write_header, type_and_null, + Emitter ) from cwriter import CWriter from typing import TextIO, Iterator @@ -61,117 +60,112 @@ def declare_variables(uop: Uop, out: CWriter) -> None: for var in uop.stack.outputs: declare_variable(var, uop, required, out) -def tier2_replace_error( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - stack: Stack, - inst: Instruction | None, -) -> None: - out.emit_at("if ", tkn) - out.emit(next(tkn_iter)) - emit_to(out, tkn_iter, "COMMA") - label = next(tkn_iter).text - next(tkn_iter) # RPAREN - next(tkn_iter) # Semi colon - out.emit(") JUMP_TO_ERROR();\n") +class Tier2Emitter(Emitter): -def tier2_replace_error_no_pop( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - stack: Stack, - inst: Instruction | None, -) -> None: - next(tkn_iter) # LPAREN - next(tkn_iter) # RPAREN - next(tkn_iter) # Semi colon - out.emit_at("JUMP_TO_ERROR();", tkn) + def __init__(self, out: CWriter): + super().__init__(out) + self._replacers["oparg"] = self.oparg -def tier2_replace_deopt( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - unused: Stack, - inst: Instruction | None, -) -> None: - out.emit_at("if ", tkn) - out.emit(next(tkn_iter)) - emit_to(out, tkn_iter, "RPAREN") - next(tkn_iter) # Semi colon - out.emit(") {\n") - out.emit("UOP_STAT_INC(uopcode, miss);\n") - out.emit("JUMP_TO_JUMP_TARGET();\n"); - out.emit("}\n") + def error_if( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, + ) -> None: + self.out.emit_at("if ", tkn) + self.emit(next(tkn_iter)) + emit_to(self.out, tkn_iter, "COMMA") + label = next(tkn_iter).text + next(tkn_iter) # RPAREN + next(tkn_iter) # Semi colon + self.emit(") JUMP_TO_ERROR();\n") + def error_no_pop( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, + ) -> None: + next(tkn_iter) # LPAREN + next(tkn_iter) # RPAREN + next(tkn_iter) # Semi colon + self.out.emit_at("JUMP_TO_ERROR();", tkn) -def tier2_replace_exit_if( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - unused: Stack, - inst: Instruction | None, -) -> None: - out.emit_at("if ", tkn) - out.emit(next(tkn_iter)) - emit_to(out, tkn_iter, "RPAREN") - next(tkn_iter) # Semi colon - out.emit(") {\n") - out.emit("UOP_STAT_INC(uopcode, miss);\n") - out.emit("JUMP_TO_JUMP_TARGET();\n") - out.emit("}\n") + def deopt_if( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + unused: Stack, + inst: Instruction | None, + ) -> None: + self.out.emit_at("if ", tkn) + self.emit(next(tkn_iter)) + emit_to(self.out, tkn_iter, "RPAREN") + next(tkn_iter) # Semi colon + self.emit(") {\n") + self.emit("UOP_STAT_INC(uopcode, miss);\n") + self.emit("JUMP_TO_JUMP_TARGET();\n"); + self.emit("}\n") + def exit_if( # type: ignore[override] + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + unused: Stack, + inst: Instruction | None, + ) -> None: + self.out.emit_at("if ", tkn) + self.emit(next(tkn_iter)) + emit_to(self.out, tkn_iter, "RPAREN") + next(tkn_iter) # Semi colon + self.emit(") {\n") + self.emit("UOP_STAT_INC(uopcode, miss);\n") + self.emit("JUMP_TO_JUMP_TARGET();\n") + self.emit("}\n") -def tier2_replace_oparg( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - unused: Stack, - inst: Instruction | None, -) -> None: - if not uop.name.endswith("_0") and not uop.name.endswith("_1"): - out.emit(tkn) - return - amp = next(tkn_iter) - if amp.text != "&": - out.emit(tkn) - out.emit(amp) - return - one = next(tkn_iter) - assert one.text == "1" - out.emit_at(uop.name[-1], tkn) - + def oparg( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + unused: Stack, + inst: Instruction | None, + ) -> None: + if not uop.name.endswith("_0") and not uop.name.endswith("_1"): + self.emit(tkn) + return + amp = next(tkn_iter) + if amp.text != "&": + self.emit(tkn) + self.emit(amp) + return + one = next(tkn_iter) + assert one.text == "1" + self.out.emit_at(uop.name[-1], tkn) -TIER2_REPLACEMENT_FUNCTIONS = REPLACEMENT_FUNCTIONS.copy() -TIER2_REPLACEMENT_FUNCTIONS["ERROR_IF"] = tier2_replace_error -TIER2_REPLACEMENT_FUNCTIONS["ERROR_NO_POP"] = tier2_replace_error_no_pop -TIER2_REPLACEMENT_FUNCTIONS["DEOPT_IF"] = tier2_replace_deopt -TIER2_REPLACEMENT_FUNCTIONS["oparg"] = tier2_replace_oparg -TIER2_REPLACEMENT_FUNCTIONS["EXIT_IF"] = tier2_replace_exit_if - - -def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None: +def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None: locals: dict[str, Local] = {} try: - out.start_line() + emitter.out.start_line() if uop.properties.oparg: - out.emit("oparg = CURRENT_OPARG();\n") + emitter.emit("oparg = CURRENT_OPARG();\n") assert uop.properties.const_oparg < 0 elif uop.properties.const_oparg >= 0: - out.emit(f"oparg = {uop.properties.const_oparg};\n") - out.emit(f"assert(oparg == CURRENT_OPARG());\n") + emitter.emit(f"oparg = {uop.properties.const_oparg};\n") + emitter.emit(f"assert(oparg == CURRENT_OPARG());\n") for var in reversed(uop.stack.inputs): code, local = stack.pop(var) - out.emit(code) + emitter.emit(code) if local.defined: locals[local.name] = local - out.emit(stack.define_output_arrays(uop.stack.outputs)) + emitter.emit(stack.define_output_arrays(uop.stack.outputs)) for cache in uop.caches: if cache.name != "unused": if cache.size == 4: @@ -179,14 +173,14 @@ def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None: else: type = f"uint{cache.size*16}_t " cast = f"uint{cache.size*16}_t" - out.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") - emit_tokens(out, uop, stack, None, TIER2_REPLACEMENT_FUNCTIONS) + emitter.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") + emitter.emit_tokens(uop, stack, None) for i, var in enumerate(uop.stack.outputs): if var.name in locals: local = locals[var.name] else: local = Local.local(var) - out.emit(stack.push(local)) + emitter.emit(stack.push(local)) except StackError as ex: raise analysis_error(ex.args[0], uop.body[0]) from None @@ -207,6 +201,7 @@ def generate_tier2( """ ) out = CWriter(outfile, 2, lines) + emitter = Tier2Emitter(out) out.emit("\n") for name, uop in analysis.uops.items(): if uop.properties.tier == 1: @@ -223,7 +218,7 @@ def generate_tier2( out.emit(f"case {uop.name}: {{\n") declare_variables(uop, out) stack = Stack() - write_uop(uop, out, stack) + write_uop(uop, emitter, stack) out.start_line() if not uop.properties.always_exits: stack.flush(out) From 6ff82fdb56fa0381f94c7a45aa67ab4c4aa71930 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 6 Aug 2024 15:37:07 +0300 Subject: [PATCH 17/23] gh-122686: bump hypothesis from 6.104.2 to 6.108.10 in Tools (#122729) This drops attrs pinning. --- Tools/requirements-hypothesis.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Tools/requirements-hypothesis.txt b/Tools/requirements-hypothesis.txt index 03f955ba8bf310..ca872b024a7179 100644 --- a/Tools/requirements-hypothesis.txt +++ b/Tools/requirements-hypothesis.txt @@ -1,7 +1,4 @@ # Requirements file for hypothesis that # we use to run our property-based tests in CI. -# see https://github.com/python/cpython/issues/122686 -attrs<=23.2.0 - -hypothesis==6.104.2 +hypothesis==6.108.10 From 0b433aa9df6b5bb84e77ff97e59b7bcd04f2199a Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 6 Aug 2024 15:43:13 +0300 Subject: [PATCH 18/23] gh-122681: merge m_atan2() and c_atan2() helper functions (#122682) --- Modules/_math.h | 39 +++++++++++++++++++++++++++++++++++++++ Modules/cmathmodule.c | 34 ++-------------------------------- Modules/mathmodule.c | 37 ------------------------------------- 3 files changed, 41 insertions(+), 69 deletions(-) diff --git a/Modules/_math.h b/Modules/_math.h index 2285b64747c0bd..b8477d2752b7cc 100644 --- a/Modules/_math.h +++ b/Modules/_math.h @@ -23,3 +23,42 @@ _Py_log1p(double x) } #define m_log1p _Py_log1p + +/* + wrapper for atan2 that deals directly with special cases before + delegating to the platform libm for the remaining cases. This + is necessary to get consistent behaviour across platforms. + Windows, FreeBSD and alpha Tru64 are amongst platforms that don't + always follow C99. Windows screws up atan2 for inf and nan, and + alpha Tru64 5.1 doesn't follow C99 for atan2(0., 0.). +*/ + +static double +_Py_atan2(double y, double x) +{ + if (isnan(x) || isnan(y)) + return Py_NAN; + if (isinf(y)) { + if (isinf(x)) { + if (copysign(1., x) == 1.) + /* atan2(+-inf, +inf) == +-pi/4 */ + return copysign(0.25*Py_MATH_PI, y); + else + /* atan2(+-inf, -inf) == +-pi*3/4 */ + return copysign(0.75*Py_MATH_PI, y); + } + /* atan2(+-inf, x) == +-pi/2 for finite x */ + return copysign(0.5*Py_MATH_PI, y); + } + if (isinf(x) || y == 0.) { + if (copysign(1., x) == 1.) + /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */ + return copysign(0., y); + else + /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */ + return copysign(Py_MATH_PI, y); + } + return atan2(y, x); +} + +#define m_atan2 _Py_atan2 diff --git a/Modules/cmathmodule.c b/Modules/cmathmodule.c index 3c7f0bb6453ef0..71686518d84aa7 100644 --- a/Modules/cmathmodule.c +++ b/Modules/cmathmodule.c @@ -324,36 +324,6 @@ cmath_atan_impl(PyObject *module, Py_complex z) return r; } -/* Windows screws up atan2 for inf and nan, and alpha Tru64 5.1 doesn't follow - C99 for atan2(0., 0.). */ -static double -c_atan2(Py_complex z) -{ - if (isnan(z.real) || isnan(z.imag)) - return Py_NAN; - if (isinf(z.imag)) { - if (isinf(z.real)) { - if (copysign(1., z.real) == 1.) - /* atan2(+-inf, +inf) == +-pi/4 */ - return copysign(0.25*Py_MATH_PI, z.imag); - else - /* atan2(+-inf, -inf) == +-pi*3/4 */ - return copysign(0.75*Py_MATH_PI, z.imag); - } - /* atan2(+-inf, x) == +-pi/2 for finite x */ - return copysign(0.5*Py_MATH_PI, z.imag); - } - if (isinf(z.real) || z.imag == 0.) { - if (copysign(1., z.real) == 1.) - /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */ - return copysign(0., z.imag); - else - /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */ - return copysign(Py_MATH_PI, z.imag); - } - return atan2(z.imag, z.real); -} - static Py_complex atanh_special_values[7][7]; @@ -966,7 +936,7 @@ cmath_phase_impl(PyObject *module, Py_complex z) double phi; errno = 0; - phi = c_atan2(z); /* should not cause any exception */ + phi = m_atan2(z.imag, z.real); /* should not cause any exception */ if (errno != 0) return math_error(); else @@ -991,7 +961,7 @@ cmath_polar_impl(PyObject *module, Py_complex z) double r, phi; errno = 0; - phi = c_atan2(z); /* should not cause any exception */ + phi = m_atan2(z.imag, z.real); /* should not cause any exception */ r = _Py_c_abs(z); /* sets errno to ERANGE on overflow */ if (errno != 0) return math_error(); diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 64dfceac8bfea3..d6d0702169e186 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -535,43 +535,6 @@ m_lgamma(double x) return r; } -/* - wrapper for atan2 that deals directly with special cases before - delegating to the platform libm for the remaining cases. This - is necessary to get consistent behaviour across platforms. - Windows, FreeBSD and alpha Tru64 are amongst platforms that don't - always follow C99. -*/ - -static double -m_atan2(double y, double x) -{ - if (isnan(x) || isnan(y)) - return Py_NAN; - if (isinf(y)) { - if (isinf(x)) { - if (copysign(1., x) == 1.) - /* atan2(+-inf, +inf) == +-pi/4 */ - return copysign(0.25*Py_MATH_PI, y); - else - /* atan2(+-inf, -inf) == +-pi*3/4 */ - return copysign(0.75*Py_MATH_PI, y); - } - /* atan2(+-inf, x) == +-pi/2 for finite x */ - return copysign(0.5*Py_MATH_PI, y); - } - if (isinf(x) || y == 0.) { - if (copysign(1., x) == 1.) - /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */ - return copysign(0., y); - else - /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */ - return copysign(Py_MATH_PI, y); - } - return atan2(y, x); -} - - /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754 binary floating-point format, the result is always exact. */ From 8ce70d6c697c8179e007169ba2ec5d3a0dc77362 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Tue, 6 Aug 2024 15:47:31 +0300 Subject: [PATCH 19/23] gh-122058: `Lib/inspect`: Update docstrings for `isfunction`, `isgenerator`, `isframe`, `iscode`. (#122059) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Victor Stinner --- Doc/library/inspect.rst | 25 +++++++++++++++++++++++++ Lib/inspect.py | 33 ++++++++++++++++++++++++--------- Objects/frameobject.c | 2 +- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst index 361f4054856d89..4107907fd0ab43 100644 --- a/Doc/library/inspect.rst +++ b/Doc/library/inspect.rst @@ -153,6 +153,19 @@ attributes (see :ref:`import-mod-attrs` for module attributes): | | f_trace | tracing function for this | | | | frame, or ``None`` | +-----------------+-------------------+---------------------------+ +| | f_trace_lines | indicate whether a | +| | | tracing event is | +| | | triggered for each source | +| | | source line | ++-----------------+-------------------+---------------------------+ +| | f_trace_opcodes | indicate whether | +| | | per-opcode events are | +| | | requested | ++-----------------+-------------------+---------------------------+ +| | clear() | used to clear all | +| | | references to local | +| | | variables | ++-----------------+-------------------+---------------------------+ | code | co_argcount | number of arguments (not | | | | including keyword only | | | | arguments, \* or \*\* | @@ -214,6 +227,18 @@ attributes (see :ref:`import-mod-attrs` for module attributes): | | | arguments and local | | | | variables | +-----------------+-------------------+---------------------------+ +| | co_lines() | returns an iterator that | +| | | yields successive | +| | | bytecode ranges | ++-----------------+-------------------+---------------------------+ +| | co_positions() | returns an iterator of | +| | | source code positions for | +| | | each bytecode instruction | ++-----------------+-------------------+---------------------------+ +| | replace() | returns a copy of the | +| | | code object with new | +| | | values | ++-----------------+-------------------+---------------------------+ | generator | __name__ | name | +-----------------+-------------------+---------------------------+ | | __qualname__ | qualified name | diff --git a/Lib/inspect.py b/Lib/inspect.py index ba3ecbb87c7026..1f287842e32498 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -264,11 +264,16 @@ def isfunction(object): Function objects provide these attributes: __doc__ documentation string __name__ name with which this function was defined + __qualname__ qualified name of this function + __module__ name of the module the function was defined in or None __code__ code object containing compiled function bytecode __defaults__ tuple of any default values for arguments __globals__ global namespace in which this function was defined __annotations__ dict of parameter annotations - __kwdefaults__ dict of keyword only parameters with defaults""" + __kwdefaults__ dict of keyword only parameters with defaults + __dict__ namespace which is supporting arbitrary function attributes + __closure__ a tuple of cells or None + __type_params__ tuple of type parameters""" return isinstance(object, types.FunctionType) def _has_code_flag(f, flag): @@ -333,17 +338,18 @@ def isgenerator(object): """Return true if the object is a generator. Generator objects provide these attributes: - __iter__ defined to support iteration over container - close raises a new GeneratorExit exception inside the - generator to terminate the iteration gi_code code object gi_frame frame object or possibly None once the generator has been exhausted gi_running set to 1 when generator is executing, 0 otherwise - next return the next item from the container - send resumes the generator and "sends" a value that becomes + gi_yieldfrom object being iterated by yield from or None + + __iter__() defined to support iteration over container + close() raises a new GeneratorExit exception inside the + generator to terminate the iteration + send() resumes the generator and "sends" a value that becomes the result of the current yield-expression - throw used to raise an exception inside the generator""" + throw() used to raise an exception inside the generator""" return isinstance(object, types.GeneratorType) def iscoroutine(object): @@ -378,7 +384,11 @@ def isframe(object): f_lasti index of last attempted instruction in bytecode f_lineno current line number in Python source code f_locals local namespace seen by this frame - f_trace tracing function for this frame, or None""" + f_trace tracing function for this frame, or None + f_trace_lines is a tracing event triggered for each source line? + f_trace_opcodes are per-opcode events being requested? + + clear() used to clear all references to local variables""" return isinstance(object, types.FrameType) def iscode(object): @@ -403,7 +413,12 @@ def iscode(object): co_names tuple of names other than arguments and function locals co_nlocals number of local variables co_stacksize virtual machine stack space required - co_varnames tuple of names of arguments and local variables""" + co_varnames tuple of names of arguments and local variables + co_qualname fully qualified function name + + co_lines() returns an iterator that yields successive bytecode ranges + co_positions() returns an iterator of source code positions for each bytecode instruction + replace() returns a copy of the code object with a new values""" return isinstance(object, types.CodeType) def isbuiltin(object): diff --git a/Objects/frameobject.c b/Objects/frameobject.c index a8be7d75371c16..4e77780eb39097 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1721,7 +1721,7 @@ frame_clear(PyFrameObject *f, PyObject *Py_UNUSED(ignored)) } PyDoc_STRVAR(clear__doc__, -"F.clear(): clear most references held by the frame"); +"F.clear(): clear all references held by the frame"); static PyObject * frame_sizeof(PyFrameObject *f, PyObject *Py_UNUSED(ignored)) From 4c317918486348ff8486168e1003be8c1daa6cf5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 6 Aug 2024 14:14:52 +0100 Subject: [PATCH 20/23] GH-120024: Move three more escaping calls out of conditional statements (GH-122734) --- Python/bytecodes.c | 16 ++++++++++++---- Python/executor_cases.c.h | 10 ++++++++-- Python/generated_cases.c.h | 16 ++++++++++++---- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a8527fe84b76f5..9a1af0e920188b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -191,7 +191,8 @@ dummy_func( uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); if (code_version != global_version && tstate->tracing == 0) { - if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { + int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); + if (err) { ERROR_NO_POP(); } next_instr = this_instr; @@ -1802,7 +1803,12 @@ dummy_func( assert(PyDict_CheckExact(dict)); /* dict[key] = value */ // Do not DECREF INPUTS because the function steals the references - ERROR_IF(_PyDict_SetItem_Take2((PyDictObject *)dict, PyStackRef_AsPyObjectSteal(key), PyStackRef_AsPyObjectSteal(value)) != 0, error); + int err = _PyDict_SetItem_Take2( + (PyDictObject *)dict, + PyStackRef_AsPyObjectSteal(key), + PyStackRef_AsPyObjectSteal(value) + ); + ERROR_IF(err != 0, error); } inst(INSTRUMENTED_LOAD_SUPER_ATTR, (unused/1, unused, unused, unused -- unused, unused if (oparg & 1))) { @@ -2455,7 +2461,8 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyExceptionInstance_Check(left_o)); - if (_PyEval_CheckExceptTypeValid(tstate, right_o) < 0) { + int err = _PyEval_CheckExceptTypeValid(tstate, right_o); + if (err < 0) { DECREF_INPUTS(); ERROR_IF(true, error); } @@ -4107,7 +4114,8 @@ dummy_func( // It converts all dict subtypes in kwargs into regular dicts. assert(kwargs == NULL || PyDict_CheckExact(kwargs)); if (!PyTuple_CheckExact(callargs)) { - if (check_args_iterable(tstate, func, callargs) < 0) { + int err = check_args_iterable(tstate, func, callargs); + if (err < 0) { ERROR_NO_POP(); } PyObject *tuple = PySequence_Tuple(callargs); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7f520eb7abbe80..afc7786c9e434d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2085,7 +2085,12 @@ assert(PyDict_CheckExact(dict)); /* dict[key] = value */ // Do not DECREF INPUTS because the function steals the references - if (_PyDict_SetItem_Take2((PyDictObject *)dict, PyStackRef_AsPyObjectSteal(key), PyStackRef_AsPyObjectSteal(value)) != 0) JUMP_TO_ERROR(); + int err = _PyDict_SetItem_Take2( + (PyDictObject *)dict, + PyStackRef_AsPyObjectSteal(key), + PyStackRef_AsPyObjectSteal(value) + ); + if (err != 0) JUMP_TO_ERROR(); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; @@ -2895,7 +2900,8 @@ PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyExceptionInstance_Check(left_o)); - if (_PyEval_CheckExceptTypeValid(tstate, right_o) < 0) { + int err = _PyEval_CheckExceptTypeValid(tstate, right_o); + if (err < 0) { PyStackRef_CLOSE(right); if (true) JUMP_TO_ERROR(); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 31490960d3828a..f670353cdbde56 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1508,7 +1508,8 @@ // It converts all dict subtypes in kwargs into regular dicts. assert(kwargs == NULL || PyDict_CheckExact(kwargs)); if (!PyTuple_CheckExact(callargs)) { - if (check_args_iterable(tstate, func, callargs) < 0) { + int err = check_args_iterable(tstate, func, callargs); + if (err < 0) { goto error; } PyObject *tuple = PySequence_Tuple(callargs); @@ -2502,7 +2503,8 @@ PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyExceptionInstance_Check(left_o)); - if (_PyEval_CheckExceptTypeValid(tstate, right_o) < 0) { + int err = _PyEval_CheckExceptTypeValid(tstate, right_o); + if (err < 0) { PyStackRef_CLOSE(right); if (true) goto pop_1_error; } @@ -4033,7 +4035,8 @@ uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); if (code_version != global_version && tstate->tracing == 0) { - if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { + int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); + if (err) { goto error; } next_instr = this_instr; @@ -5546,7 +5549,12 @@ assert(PyDict_CheckExact(dict)); /* dict[key] = value */ // Do not DECREF INPUTS because the function steals the references - if (_PyDict_SetItem_Take2((PyDictObject *)dict, PyStackRef_AsPyObjectSteal(key), PyStackRef_AsPyObjectSteal(value)) != 0) goto pop_2_error; + int err = _PyDict_SetItem_Take2( + (PyDictObject *)dict, + PyStackRef_AsPyObjectSteal(key), + PyStackRef_AsPyObjectSteal(value) + ); + if (err != 0) goto pop_2_error; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); DISPATCH(); From 4b66b6b7d6e65f9eb2d61435b9b37ffeb7bb00fb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 6 Aug 2024 19:45:53 +0300 Subject: [PATCH 21/23] gh-120104: IDLE: Fix padding in config and search dialogs (#120107) --- Lib/idlelib/configdialog.py | 2 +- Lib/idlelib/searchbase.py | 2 +- .../next/IDLE/2024-06-05-14-54-24.gh-issue-120104.j_thj4.rst | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/IDLE/2024-06-05-14-54-24.gh-issue-120104.j_thj4.rst diff --git a/Lib/idlelib/configdialog.py b/Lib/idlelib/configdialog.py index eedf97bf74fe6a..4d2adb48570d49 100644 --- a/Lib/idlelib/configdialog.py +++ b/Lib/idlelib/configdialog.py @@ -111,7 +111,7 @@ def create_widgets(self): load_configs: Load pages except for extensions. activate_config_changes: Tell editors to reload. """ - self.frame = frame = Frame(self, padding="5px") + self.frame = frame = Frame(self, padding=5) self.frame.grid(sticky="nwes") self.note = note = Notebook(frame) self.extpage = ExtPage(note) diff --git a/Lib/idlelib/searchbase.py b/Lib/idlelib/searchbase.py index 64ed50c7364be3..c68a6ca339af04 100644 --- a/Lib/idlelib/searchbase.py +++ b/Lib/idlelib/searchbase.py @@ -86,7 +86,7 @@ def create_widgets(self): top.wm_iconname(self.icon) _setup_dialog(top) self.top = top - self.frame = Frame(top, padding="5px") + self.frame = Frame(top, padding=5) self.frame.grid(sticky="nwes") top.grid_columnconfigure(0, weight=100) top.grid_rowconfigure(0, weight=100) diff --git a/Misc/NEWS.d/next/IDLE/2024-06-05-14-54-24.gh-issue-120104.j_thj4.rst b/Misc/NEWS.d/next/IDLE/2024-06-05-14-54-24.gh-issue-120104.j_thj4.rst new file mode 100644 index 00000000000000..10f5e345bf3e4f --- /dev/null +++ b/Misc/NEWS.d/next/IDLE/2024-06-05-14-54-24.gh-issue-120104.j_thj4.rst @@ -0,0 +1 @@ +Fix padding in config and search dialog windows in IDLE. From 58be1c270f2275603e56127791fa6777476954ec Mon Sep 17 00:00:00 2001 From: Nate Ohlson Date: Tue, 6 Aug 2024 12:26:37 -0500 Subject: [PATCH 22/23] gh-112301: Add macOS warning tracking tooling (#122211) Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- .github/workflows/reusable-macos.yml | 4 +- .github/workflows/reusable-ubuntu.yml | 2 +- ...-07-24-05-18-25.gh-issue-112301.lfINgZ.rst | 2 + Tools/build/.warningignore_macos | 3 + Tools/build/check_warnings.py | 161 ++++++++++++------ 5 files changed, 117 insertions(+), 55 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-07-24-05-18-25.gh-issue-112301.lfINgZ.rst create mode 100644 Tools/build/.warningignore_macos diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index 64ef2c91329d81..d77723ef27c2dc 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -48,8 +48,10 @@ jobs: --prefix=/opt/python-dev \ --with-openssl="$(brew --prefix openssl@3.0)" - name: Build CPython - run: make -j8 + run: set -o pipefail; make -j8 2>&1 | tee compiler_output.txt - name: Display build info run: make pythoninfo + - name: Check compiler warnings + run: python3 Tools/build/check_warnings.py --compiler-output-file-path=compiler_output.txt --warning-ignore-file-path=Tools/build/.warningignore_macos --compiler-output-type=clang - name: Tests run: make test diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 8dd5f559585368..92069fddc31217 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -80,7 +80,7 @@ jobs: working-directory: ${{ env.CPYTHON_BUILDDIR }} run: make pythoninfo - name: Check compiler warnings - run: python Tools/build/check_warnings.py --compiler-output-file-path=${{ env.CPYTHON_BUILDDIR }}/compiler_output.txt --warning-ignore-file-path ${GITHUB_WORKSPACE}/Tools/build/.warningignore_ubuntu + run: python Tools/build/check_warnings.py --compiler-output-file-path=${{ env.CPYTHON_BUILDDIR }}/compiler_output.txt --warning-ignore-file-path ${GITHUB_WORKSPACE}/Tools/build/.warningignore_ubuntu --compiler-output-type=json - name: Remount sources writable for tests # some tests write to srcdir, lack of pyc files slows down testing run: sudo mount $CPYTHON_RO_SRCDIR -oremount,rw diff --git a/Misc/NEWS.d/next/Security/2024-07-24-05-18-25.gh-issue-112301.lfINgZ.rst b/Misc/NEWS.d/next/Security/2024-07-24-05-18-25.gh-issue-112301.lfINgZ.rst new file mode 100644 index 00000000000000..81237e735ebdb7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-07-24-05-18-25.gh-issue-112301.lfINgZ.rst @@ -0,0 +1,2 @@ +Add macOS warning tracking to warning check tooling. +Patch by Nate Ohlson. diff --git a/Tools/build/.warningignore_macos b/Tools/build/.warningignore_macos new file mode 100644 index 00000000000000..1b504dfc54000f --- /dev/null +++ b/Tools/build/.warningignore_macos @@ -0,0 +1,3 @@ +# Files listed will be ignored by the compiler warning checker +# for the macOS/build and test job. +# Keep lines sorted lexicographically to help avoid merge conflicts. diff --git a/Tools/build/check_warnings.py b/Tools/build/check_warnings.py index af9f7f169ad943..31258932dbd4ca 100644 --- a/Tools/build/check_warnings.py +++ b/Tools/build/check_warnings.py @@ -2,39 +2,87 @@ Parses compiler output with -fdiagnostics-format=json and checks that warnings exist only in files that are expected to have warnings. """ + import argparse +from collections import defaultdict import json import re import sys from pathlib import Path -def extract_warnings_from_compiler_output(compiler_output: str) -> list[dict]: +def extract_warnings_from_compiler_output_clang( + compiler_output: str, +) -> list[dict]: """ - Extracts warnings from the compiler output when using - -fdiagnostics-format=json + Extracts warnings from the compiler output when using clang + """ + # Regex to find warnings in the compiler output + clang_warning_regex = re.compile( + r"(?P.*):(?P\d+):(?P\d+): warning: (?P.*)" + ) + compiler_warnings = [] + for line in compiler_output.splitlines(): + if match := clang_warning_regex.match(line): + compiler_warnings.append( + { + "file": match.group("file"), + "line": match.group("line"), + "column": match.group("column"), + "message": match.group("message"), + } + ) - Compiler output as a whole is not a valid json document, but includes many - json objects and may include other output that is not json. + return compiler_warnings + + +def extract_warnings_from_compiler_output_json( + compiler_output: str, +) -> list[dict]: """ + Extracts warnings from the compiler output when using + -fdiagnostics-format=json. + Compiler output as a whole is not a valid json document, + but includes many json objects and may include other output + that is not json. + """ # Regex to find json arrays at the top level of the file # in the compiler output json_arrays = re.findall( - r"\[(?:[^\[\]]|\[(?:[^\[\]]|\[[^\[\]]*\])*\])*\]", compiler_output + r"\[(?:[^[\]]|\[[^\]]*\])*\]", compiler_output ) compiler_warnings = [] for array in json_arrays: try: json_data = json.loads(array) json_objects_in_array = [entry for entry in json_data] - compiler_warnings.extend( - [ - entry - for entry in json_objects_in_array - if entry.get("kind") == "warning" - ] - ) + warning_list = [ + entry + for entry in json_objects_in_array + if entry.get("kind") == "warning" + ] + for warning in warning_list: + locations = warning["locations"] + for location in locations: + for key in ["caret", "start", "end"]: + if key in location: + compiler_warnings.append( + { + # Remove leading current directory if present + "file": location[key]["file"].lstrip("./"), + "line": location[key]["line"], + "column": location[key]["column"], + "message": warning["message"], + } + ) + # Found a caret, start, or end in location so + # break out completely to address next warning + break + else: + continue + break + except json.JSONDecodeError: continue # Skip malformed JSON @@ -46,27 +94,16 @@ def get_warnings_by_file(warnings: list[dict]) -> dict[str, list[dict]]: Returns a dictionary where the key is the file and the data is the warnings in that file """ - warnings_by_file = {} + warnings_by_file = defaultdict(list) for warning in warnings: - locations = warning["locations"] - for location in locations: - for key in ["caret", "start", "end"]: - if key in location: - file = location[key]["file"] - file = file.lstrip( - "./" - ) # Remove leading current directory if present - if file not in warnings_by_file: - warnings_by_file[file] = [] - warnings_by_file[file].append(warning) + warnings_by_file[warning["file"]].append(warning) return warnings_by_file def get_unexpected_warnings( - warnings: list[dict], files_with_expected_warnings: set[str], - files_with_warnings: set[str], + files_with_warnings: dict[str, list[dict]], ) -> int: """ Returns failure status if warnings discovered in list of warnings @@ -88,13 +125,12 @@ def get_unexpected_warnings( def get_unexpected_improvements( - warnings: list[dict], files_with_expected_warnings: set[str], - files_with_warnings: set[str], + files_with_warnings: dict[str, list[dict]], ) -> int: """ - Returns failure status if there are no warnings in the list of warnings for - a file that is in the list of files with expected warnings + Returns failure status if there are no warnings in the list of warnings + for a file that is in the list of files with expected warnings """ unexpected_improvements = [] for file in files_with_expected_warnings: @@ -123,7 +159,6 @@ def main(argv: list[str] | None = None) -> int: "-i", "--warning-ignore-file-path", type=str, - required=True, help="Path to the warning ignore file", ) parser.add_argument( @@ -141,6 +176,14 @@ def main(argv: list[str] | None = None) -> int: help="Flag to fail if files that were expected " "to have warnings have no warnings", ) + parser.add_argument( + "-t", + "--compiler-output-type", + type=str, + required=True, + choices=["json", "clang"], + help="Type of compiler output file (json or clang)", + ) args = parser.parse_args(argv) @@ -149,44 +192,56 @@ def main(argv: list[str] | None = None) -> int: # Check that the compiler output file is a valid path if not Path(args.compiler_output_file_path).is_file(): print( - "Compiler output file does not exist: " - f"{args.compiler_output_file_path}" + f"Compiler output file does not exist:" + f" {args.compiler_output_file_path}" ) return 1 - # Check that the warning ignore file is a valid path - if not Path(args.warning_ignore_file_path).is_file(): + # Check that a warning ignore file was specified and if so is a valid path + if not args.warning_ignore_file_path: print( - "Warning ignore file does not exist: " - f"{args.warning_ignore_file_path}" + "Warning ignore file not specified." + " Continuing without it (no warnings ignored)." ) - return 1 + files_with_expected_warnings = set() + else: + if not Path(args.warning_ignore_file_path).is_file(): + print( + f"Warning ignore file does not exist:" + f" {args.warning_ignore_file_path}" + ) + return 1 + with Path(args.warning_ignore_file_path).open( + encoding="UTF-8" + ) as clean_files: + files_with_expected_warnings = { + file.strip() + for file in clean_files + if file.strip() and not file.startswith("#") + } with Path(args.compiler_output_file_path).open(encoding="UTF-8") as f: compiler_output_file_contents = f.read() - with Path(args.warning_ignore_file_path).open( - encoding="UTF-8" - ) as clean_files: - files_with_expected_warnings = { - file.strip() - for file in clean_files - if file.strip() and not file.startswith("#") - } - - warnings = extract_warnings_from_compiler_output( - compiler_output_file_contents - ) + if args.compiler_output_type == "json": + warnings = extract_warnings_from_compiler_output_json( + compiler_output_file_contents + ) + elif args.compiler_output_type == "clang": + warnings = extract_warnings_from_compiler_output_clang( + compiler_output_file_contents + ) + files_with_warnings = get_warnings_by_file(warnings) status = get_unexpected_warnings( - warnings, files_with_expected_warnings, files_with_warnings + files_with_expected_warnings, files_with_warnings ) if args.fail_on_regression: exit_code |= status status = get_unexpected_improvements( - warnings, files_with_expected_warnings, files_with_warnings + files_with_expected_warnings, files_with_warnings ) if args.fail_on_improvement: exit_code |= status From c4e8196940bdf2eeb45692fb54741c507766bc74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=92=D0=B8=D1=82=D0=B0=D0=BB=D0=B8=D0=B9=20=D0=94=D0=BC?= =?UTF-8?q?=D0=B8=D1=82=D1=80=D0=B8=D0=B5=D0=B2?= Date: Tue, 6 Aug 2024 20:38:33 +0300 Subject: [PATCH 23/23] Fix duplicated words 'begins with a' in pathlib docstring (#122732) --- Lib/pathlib/_abc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index c32e7762cefea3..ee903177aa10d4 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -63,7 +63,7 @@ def splitdrive(self, path): def splitext(self, path): """Split the path into a pair (root, ext), where *ext* is empty or - begins with a begins with a period and contains at most one period, + begins with a period and contains at most one period, and *root* is everything before the extension.""" raise UnsupportedOperation(self._unsupported_msg('splitext()'))