From 34c2b40a01fa9d91e667e88c322191c03e8ab7c0 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 24 Sep 2024 16:17:49 -0600 Subject: [PATCH 01/10] add first draft of free-threading page for the guide --- guide/src/SUMMARY.md | 1 + guide/src/free-threading.md | 127 ++++++++++++++++++++++++++++++++++++ guide/src/migration.md | 72 ++------------------ 3 files changed, 134 insertions(+), 66 deletions(-) create mode 100644 guide/src/free-threading.md diff --git a/guide/src/SUMMARY.md b/guide/src/SUMMARY.md index af43897c014..1d0cdece6a0 100644 --- a/guide/src/SUMMARY.md +++ b/guide/src/SUMMARY.md @@ -25,6 +25,7 @@ - [Conversion traits](conversions/traits.md) - [Using `async` and `await`](async-await.md) - [Parallelism](parallelism.md) +- [Supporting Free-Threaded Python](free-threaded.md) - [Debugging](debugging.md) - [Features reference](features.md) - [Performance](performance.md) diff --git a/guide/src/free-threading.md b/guide/src/free-threading.md new file mode 100644 index 00000000000..dba7b8dfa7c --- /dev/null +++ b/guide/src/free-threading.md @@ -0,0 +1,127 @@ +# Supporting Free-Threaded CPython + +CPython 3.13 introduces an experimental build of CPython that does not rely on +the global interpreter lock for thread safety. As of version 0.23, PyO3 also has +preliminary support for building rust extensions for the free-threaded Python +build and support for calling into free-threaded Python from Rust. + +The main benefit for supporting free-threaded Python is that it is no longer +necessary to rely on rust parallelism to achieve concurrent speedups using +PyO3. Instead, you can parallelise in Python using the +[`threading`](https://docs.python.org/3/library/threading.html) module, and +still expect to see see multicore speedups by exploiting threaded concurrency in +Python, without any need to release the GIL. If you have ever needed to use +`multiprocessing` to achieve a speedup for some algorithm written in Python, +free-threading will likely allow the use of Python threads instead for the same +workflow. + +If you want more background on free-threaded Python in general, see the [what's +new](https://docs.python.org/3.13/whatsnew/3.13.html#whatsnew313-free-threaded-cpython) +entry in the CPython docs, the [HOWTO +guide](https://docs.python.org/3.13/howto/free-threading-extensions.html#freethreading-extensions-howto) +for porting C extensions, and [PEP 703](https://peps.python.org/pep-0703/), +which provides the technical background for the free-threading implementation in +CPython. + +This document provides advice for porting rust code using PyO3 to run under +free-threaded Python. While many simple PyO3 uses, like defining an immutable +python class, will likely work "out of the box", there are currently some +limitations. + +## Many symbols exposed by PyO3 have `GIL` in the name + +We are aware that there are some naming issues in the PyO3 API that make it +awkward to work in an environment where there is no GIL. We plan to change the +names of these types to deemphasize the role of the GIL in future versions of +PyO3, but for now you should remember that the use of the term `GIL` in +functions and types like `with_gil` and `GILOnceCell` is historical. + +Instead, you can think about whether or not you a rust scope has access to a +Python **thread state** in `ATTACHED` status. See [PEP +703](https://peps.python.org/pep-0703/#thread-states) for more background about +Python thread states and status. In order to use the CPython C API in both the +GIL-enabled and free-threaded builds of CPython, you must own an attached +Python thread state. The `with_gil` function sets this up and releases the +thread state after the closure passed to `with_gil` finishes. Similarly, in both +the GIL-enabled and free-threaded build, you must use `allow_threads` in +order to use rust threads. Both of `with_gil` and `allow_threads` tell CPython +to put the Python thread state into `DETACHED` status. In the GIL-enabled build, +this is equivalent to releasing the GIL. In the free-threaded build, this unblocks +CPython from triggering a stop-the-world for a garbage collection pass. + +## Runtime panics for multithreaded access of mutable `pyclass` instances + +If you wrote code that makes strong assumptions about the GIL protecting shared +mutable state, it may not currently be straightforward to support free-threaded +Python without the risk of runtime mutable borrow panics. PyO3 does not lock +access to python state, so if more than one thread tries to access a python +object that has already been mutably borrowed, only runtime checking enforces +safety around mutably aliased data owned by the Python interpreter. + +It was always possible to generate panics like this in PyO3 in code that +releases the GIL with `allow_threads`, but now in free-threaded python it's much +easier to trigger these panics because there is no GIL. + +We will allow user-selectable semantics for for mutable pyclass definitions in +PyO3 0.24, allowing some form of opt-in locking to emulate the GIL if +that is needed. + +## `GILProtected` is not exposed + +`GILProtected` is a PyO3 type that allows mutable access to static data by +leveraging the GIL to lock concurrent access from other threads. In +free-threaded python there is no GIL, so you will need to replace this type with +some other form of locking. In many cases, `std::sync::Atomic` or +`std::sync::Mutex` will be sufficient. If the locks do not guard the execution +of arbitrary Python code or use of the CPython C API then conditional +compilation is likely unnecessary since `GILProtected` was not needed in the +first place. + +Before: + +```rust +# fn main() { +# #[cfg(not(Py_GIL_DISABLED))] { +# use pyo3::prelude::*; +use pyo3::sync::GILProtected; +use pyo3::types::{PyDict, PyNone}; +use std::cell::RefCell; + +static OBJECTS: GILProtected>>> = + GILProtected::new(RefCell::new(Vec::new())); + +Python::with_gil(|py| { + // stand-in for something that executes arbitrary python code + let d = PyDict::new(py); + d.set_item(PyNone::get(py), PyNone::get(py)).unwrap(); + OBJECTS.get(py).borrow_mut().push(d.unbind()); +}); +# }} +``` + +After: + +```rust +# use pyo3::prelude::*; +# fn main() { +use pyo3::types::{PyDict, PyNone}; +use std::sync::Mutex; + +static OBJECTS: Mutex>> = Mutex::new(Vec::new()); + +Python::with_gil(|py| { + // stand-in for something that executes arbitrary python code + let d = PyDict::new(py); + d.set_item(PyNone::get(py), PyNone::get(py)).unwrap(); + // we're not executing python code while holding the lock, so GILProtected + // was never needed + OBJECTS.lock().unwrap().push(d.unbind()); +}); +# } +``` + +If you are executing arbitrary Python code while holding the lock, then you will +need to use conditional compilation to use `GILProtected` on GIL-enabled python +builds and mutexes otherwise. Python 3.13 introduces `PyMutex`, which releases +the GIL while the lock is held, so that is another option if you only need to +support newer Python versions. diff --git a/guide/src/migration.md b/guide/src/migration.md index be6d0748b55..ac20408a522 100644 --- a/guide/src/migration.md +++ b/guide/src/migration.md @@ -208,80 +208,20 @@ impl<'a, 'py> IntoPyObject<'py> for &'a MyPyObjectWrapper { ### Free-threaded Python Support -
-Click to expand PyO3 0.23 introduces preliminary support for the new free-threaded build of -CPython 3.13. PyO3 features that implicitly assumed the existence of the GIL -are not exposed in the free-threaded build, since they are no longer safe. -Other features, such as `GILOnceCell`, have been internally rewritten to be -threadsafe without the GIL. +CPython 3.13. PyO3 features that implicitly assumed the existence of the GIL are +not exposed in the free-threaded build, since they are no longer safe. Other +features, such as `GILOnceCell`, have been internally rewritten to be threadsafe +without the GIL. If you make use of these features then you will need to account for the unavailability of this API in the free-threaded build. One way to handle it is via conditional compilation -- extensions built for the free-threaded build will have the `Py_GIL_DISABLED` attribute defined. -### `GILProtected` - -`GILProtected` allows mutable access to static data by leveraging the GIL to -lock concurrent access from other threads. In free-threaded python there is no -GIL, so you will need to replace this type with some other form of locking. In -many cases, `std::sync::Atomic` or `std::sync::Mutex` will be sufficient. If the -locks do not guard the execution of arbitrary Python code or use of the CPython -C API then conditional compilation is likely unnecessary since `GILProtected` -was not needed in the first place. - -Before: - -```rust -# fn main() { -# #[cfg(not(Py_GIL_DISABLED))] { -# use pyo3::prelude::*; -use pyo3::sync::GILProtected; -use pyo3::types::{PyDict, PyNone}; -use std::cell::RefCell; - -static OBJECTS: GILProtected>>> = - GILProtected::new(RefCell::new(Vec::new())); - -Python::with_gil(|py| { - // stand-in for something that executes arbitrary python code - let d = PyDict::new(py); - d.set_item(PyNone::get(py), PyNone::get(py)).unwrap(); - OBJECTS.get(py).borrow_mut().push(d.unbind()); -}); -# }} -``` - -After: - -```rust -# use pyo3::prelude::*; -# fn main() { -use pyo3::types::{PyDict, PyNone}; -use std::sync::Mutex; - -static OBJECTS: Mutex>> = Mutex::new(Vec::new()); - -Python::with_gil(|py| { - // stand-in for something that executes arbitrary python code - let d = PyDict::new(py); - d.set_item(PyNone::get(py), PyNone::get(py)).unwrap(); - // we're not executing python code while holding the lock, so GILProtected - // was never needed - OBJECTS.lock().unwrap().push(d.unbind()); -}); -# } -``` - -If you are executing arbitrary Python code while holding the lock, then you will -need to use conditional compilation to use `GILProtected` on GIL-enabled python -builds and mutexes otherwise. Python 3.13 introduces `PyMutex`, which releases -the GIL while the lock is held, so that is another option if you only need to -support newer Python versions. - -
+See [the guide section on free-threaded Python](free-threading.md) for more +details about supporting free-threaded Python in your PyO3 extensions. ## from 0.21.* to 0.22 From 539ef9a6490e108e3522660a88b7c9e30113fb16 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 1 Oct 2024 14:53:01 -0600 Subject: [PATCH 02/10] respond to review comments --- guide/src/free-threading.md | 89 ++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 31 deletions(-) diff --git a/guide/src/free-threading.md b/guide/src/free-threading.md index dba7b8dfa7c..39f0373d67f 100644 --- a/guide/src/free-threading.md +++ b/guide/src/free-threading.md @@ -1,20 +1,27 @@ # Supporting Free-Threaded CPython CPython 3.13 introduces an experimental build of CPython that does not rely on -the global interpreter lock for thread safety. As of version 0.23, PyO3 also has -preliminary support for building rust extensions for the free-threaded Python -build and support for calling into free-threaded Python from Rust. +the global interpreter lock (often referred to as the GIL) for thread safety. As +of version 0.23, PyO3 also has preliminary support for building rust extensions +for the free-threaded Python build and support for calling into free-threaded +Python from Rust. The main benefit for supporting free-threaded Python is that it is no longer necessary to rely on rust parallelism to achieve concurrent speedups using PyO3. Instead, you can parallelise in Python using the [`threading`](https://docs.python.org/3/library/threading.html) module, and -still expect to see see multicore speedups by exploiting threaded concurrency in +still expect to see multicore speedups by exploiting threaded concurrency in Python, without any need to release the GIL. If you have ever needed to use `multiprocessing` to achieve a speedup for some algorithm written in Python, free-threading will likely allow the use of Python threads instead for the same workflow. +PyO3's support for free-threaded Python will enable authoring native Python +extensions that are thread-safe by construction, with much stronger safety +guarantees than C extensions. Our goal is to enable ["fearless +concurrency"](https://doc.rust-lang.org/book/ch16-00-concurrency.html) in the +native Python runtime by building on the rust `Send` and `Sync` traits. + If you want more background on free-threaded Python in general, see the [what's new](https://docs.python.org/3.13/whatsnew/3.13.html#whatsnew313-free-threaded-cpython) entry in the CPython docs, the [HOWTO @@ -25,44 +32,64 @@ CPython. This document provides advice for porting rust code using PyO3 to run under free-threaded Python. While many simple PyO3 uses, like defining an immutable -python class, will likely work "out of the box", there are currently some -limitations. +Python class, will likely work "out of the box", there are currently some +limitations. ## Many symbols exposed by PyO3 have `GIL` in the name We are aware that there are some naming issues in the PyO3 API that make it -awkward to work in an environment where there is no GIL. We plan to change the -names of these types to deemphasize the role of the GIL in future versions of -PyO3, but for now you should remember that the use of the term `GIL` in -functions and types like `with_gil` and `GILOnceCell` is historical. +awkward to think about a runtime environment where there is no GIL. We plan to +change the names of these types to de-emphasize the role of the GIL in future +versions of PyO3, but for now you should remember that the use of the term `GIL` +in functions and types like `with_gil` and `GILOnceCell` is historical. -Instead, you can think about whether or not you a rust scope has access to a -Python **thread state** in `ATTACHED` status. See [PEP +Instead, you can think about whether or not a rust thread is attached to a +Python **thread state**. See [PEP 703](https://peps.python.org/pep-0703/#thread-states) for more background about -Python thread states and status. In order to use the CPython C API in both the -GIL-enabled and free-threaded builds of CPython, you must own an attached -Python thread state. The `with_gil` function sets this up and releases the -thread state after the closure passed to `with_gil` finishes. Similarly, in both -the GIL-enabled and free-threaded build, you must use `allow_threads` in -order to use rust threads. Both of `with_gil` and `allow_threads` tell CPython -to put the Python thread state into `DETACHED` status. In the GIL-enabled build, -this is equivalent to releasing the GIL. In the free-threaded build, this unblocks -CPython from triggering a stop-the-world for a garbage collection pass. +Python thread states and status. + +In order to use the CPython C API in both the GIL-enabled and free-threaded +builds of CPython, the thread calling into the C API must own an attached Python +thread state. In the GIL-enabled build the thread that holds the GIL by +definition is attached to a valid Python thread state, and therefore only one +thread at a time can call into the C API. + +What a thread releases the GIL, the Python thread state owned by that thread is +detached from the interpreter runtime, and it is not valid to call into the +CPython C API. + +In the free-threaded build, more than one thread can simultaneously call into +the C API, but any thread that does so must still have a reference to a valid +attached thread state. The CPython runtime also assumes it is responsible for +creating and destroying threads, so it is necessary to detach from the runtime +before creating any native threads outside of the CPython runtime. In the +GIL-enabled build, this corresponds to dropping the GIL with an `allow_threads` +call. + +In the GIL-enabled build, releasing the GIL allows other threads to +proceed. This is no longer necessary in the free-threaded build, but you should +still release the GIL when doing long-running tasks that do not require the +CPython runtime, since releasing the GIL unblocks running the Python garbage +collector and freeing unused memory. ## Runtime panics for multithreaded access of mutable `pyclass` instances If you wrote code that makes strong assumptions about the GIL protecting shared mutable state, it may not currently be straightforward to support free-threaded Python without the risk of runtime mutable borrow panics. PyO3 does not lock -access to python state, so if more than one thread tries to access a python +access to Python state, so if more than one thread tries to access a Python object that has already been mutably borrowed, only runtime checking enforces -safety around mutably aliased data owned by the Python interpreter. +safety around mutably aliased data owned by the Python interpreter. We believe +that it would require adding an `unsafe impl` for `Send` or `Sync` to trigger +this behavior. Please report any issues related to runtime borrow checker errors +on mutable pyclass implementations that do not make strong assumptions about the +GIL. It was always possible to generate panics like this in PyO3 in code that -releases the GIL with `allow_threads`, but now in free-threaded python it's much -easier to trigger these panics because there is no GIL. +releases the GIL with `allow_threads`, but now in free-threaded Python there are +more opportunities to trigger these panics because there is no GIL. -We will allow user-selectable semantics for for mutable pyclass definitions in +We plan to allow user-selectable semantics for for mutable pyclass definitions in PyO3 0.24, allowing some form of opt-in locking to emulate the GIL if that is needed. @@ -70,7 +97,7 @@ that is needed. `GILProtected` is a PyO3 type that allows mutable access to static data by leveraging the GIL to lock concurrent access from other threads. In -free-threaded python there is no GIL, so you will need to replace this type with +free-threaded Python there is no GIL, so you will need to replace this type with some other form of locking. In many cases, `std::sync::Atomic` or `std::sync::Mutex` will be sufficient. If the locks do not guard the execution of arbitrary Python code or use of the CPython C API then conditional @@ -91,7 +118,7 @@ static OBJECTS: GILProtected>>> = GILProtected::new(RefCell::new(Vec::new())); Python::with_gil(|py| { - // stand-in for something that executes arbitrary python code + // stand-in for something that executes arbitrary Python code let d = PyDict::new(py); d.set_item(PyNone::get(py), PyNone::get(py)).unwrap(); OBJECTS.get(py).borrow_mut().push(d.unbind()); @@ -110,10 +137,10 @@ use std::sync::Mutex; static OBJECTS: Mutex>> = Mutex::new(Vec::new()); Python::with_gil(|py| { - // stand-in for something that executes arbitrary python code + // stand-in for something that executes arbitrary Python code let d = PyDict::new(py); d.set_item(PyNone::get(py), PyNone::get(py)).unwrap(); - // we're not executing python code while holding the lock, so GILProtected + // we're not executing Python code while holding the lock, so GILProtected // was never needed OBJECTS.lock().unwrap().push(d.unbind()); }); @@ -121,7 +148,7 @@ Python::with_gil(|py| { ``` If you are executing arbitrary Python code while holding the lock, then you will -need to use conditional compilation to use `GILProtected` on GIL-enabled python +need to use conditional compilation to use `GILProtected` on GIL-enabled Python builds and mutexes otherwise. Python 3.13 introduces `PyMutex`, which releases the GIL while the lock is held, so that is another option if you only need to support newer Python versions. From ffd5267bf7d6765d98fc680c64d6ccf2faa97878 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 1 Oct 2024 14:58:44 -0600 Subject: [PATCH 03/10] link to interior mutability docs --- guide/src/free-threading.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/guide/src/free-threading.md b/guide/src/free-threading.md index 39f0373d67f..d2b7c34e398 100644 --- a/guide/src/free-threading.md +++ b/guide/src/free-threading.md @@ -86,8 +86,10 @@ on mutable pyclass implementations that do not make strong assumptions about the GIL. It was always possible to generate panics like this in PyO3 in code that -releases the GIL with `allow_threads`, but now in free-threaded Python there are -more opportunities to trigger these panics because there is no GIL. +releases the GIL with `allow_threads` (see [the docs on interior +mutability](./class.md#bound-and-interior-mutability),) but now in free-threaded +Python there are more opportunities to trigger these panics because there is no +GIL. We plan to allow user-selectable semantics for for mutable pyclass definitions in PyO3 0.24, allowing some form of opt-in locking to emulate the GIL if From d0af6db2218c5262a0aa2fe2a55e06f24e069647 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Mon, 14 Oct 2024 12:30:49 -0600 Subject: [PATCH 04/10] respond to review comments --- guide/src/free-threading.md | 117 ++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 60 deletions(-) diff --git a/guide/src/free-threading.md b/guide/src/free-threading.md index d2b7c34e398..d6cc597de74 100644 --- a/guide/src/free-threading.md +++ b/guide/src/free-threading.md @@ -1,26 +1,11 @@ # Supporting Free-Threaded CPython -CPython 3.13 introduces an experimental build of CPython that does not rely on -the global interpreter lock (often referred to as the GIL) for thread safety. As -of version 0.23, PyO3 also has preliminary support for building rust extensions -for the free-threaded Python build and support for calling into free-threaded -Python from Rust. - -The main benefit for supporting free-threaded Python is that it is no longer -necessary to rely on rust parallelism to achieve concurrent speedups using -PyO3. Instead, you can parallelise in Python using the -[`threading`](https://docs.python.org/3/library/threading.html) module, and -still expect to see multicore speedups by exploiting threaded concurrency in -Python, without any need to release the GIL. If you have ever needed to use -`multiprocessing` to achieve a speedup for some algorithm written in Python, -free-threading will likely allow the use of Python threads instead for the same -workflow. - -PyO3's support for free-threaded Python will enable authoring native Python -extensions that are thread-safe by construction, with much stronger safety -guarantees than C extensions. Our goal is to enable ["fearless -concurrency"](https://doc.rust-lang.org/book/ch16-00-concurrency.html) in the -native Python runtime by building on the rust `Send` and `Sync` traits. +CPython 3.13 introduces an experimental "free-threaded" build of CPython that +does not rely on the [global interpreter +lock](https://docs.python.org/3/glossary.html#term-global-interpreter-lock) +(often referred to as the GIL) for thread safety. As of version 0.23, PyO3 also +has preliminary support for building rust extensions for the free-threaded +Python build and support for calling into free-threaded Python from Rust. If you want more background on free-threaded Python in general, see the [what's new](https://docs.python.org/3.13/whatsnew/3.13.html#whatsnew313-free-threaded-cpython) @@ -30,6 +15,23 @@ for porting C extensions, and [PEP 703](https://peps.python.org/pep-0703/), which provides the technical background for the free-threading implementation in CPython. +The main benefit for supporting free-threaded Python is that both pure Python +code and code interacting with Python via the CPython C API can run +simultaneously on multiple OS threads. This means it's much more +straightforward to achieve multithreaded parallelism in the Python layer, using +the `threading` module, without any need to ensure the GIL is released during +compute-heavy tasks, or any worries about [Amdahl's +law](https://en.wikipedia.org/wiki/Amdahl%27s_law) limiting parallel +performance. If you have ever needed to use `multiprocessing` to achieve a +speedup for some Python code, free-threading will likely allow the use of Python +threads instead for the same workflow. + +PyO3's support for free-threaded Python will enable authoring native Python +extensions that are thread-safe by construction, with much stronger safety +guarantees than C extensions. Our goal is to enable ["fearless +concurrency"](https://doc.rust-lang.org/book/ch16-00-concurrency.html) in the +native Python runtime by building on the rust `Send` and `Sync` traits. + This document provides advice for porting rust code using PyO3 to run under free-threaded Python. While many simple PyO3 uses, like defining an immutable Python class, will likely work "out of the box", there are currently some @@ -44,33 +46,28 @@ versions of PyO3, but for now you should remember that the use of the term `GIL` in functions and types like `with_gil` and `GILOnceCell` is historical. Instead, you can think about whether or not a rust thread is attached to a -Python **thread state**. See [PEP +Python interpreter runtime. See [PEP 703](https://peps.python.org/pep-0703/#thread-states) for more background about -Python thread states and status. - -In order to use the CPython C API in both the GIL-enabled and free-threaded -builds of CPython, the thread calling into the C API must own an attached Python -thread state. In the GIL-enabled build the thread that holds the GIL by -definition is attached to a valid Python thread state, and therefore only one -thread at a time can call into the C API. - -What a thread releases the GIL, the Python thread state owned by that thread is -detached from the interpreter runtime, and it is not valid to call into the -CPython C API. - -In the free-threaded build, more than one thread can simultaneously call into -the C API, but any thread that does so must still have a reference to a valid -attached thread state. The CPython runtime also assumes it is responsible for -creating and destroying threads, so it is necessary to detach from the runtime -before creating any native threads outside of the CPython runtime. In the -GIL-enabled build, this corresponds to dropping the GIL with an `allow_threads` -call. +how threads can be attached and detached from the interpreter runtime, in a +manner analagous to releasing and acquiring the GIL in the GIL-enabled build. + +Calling into the CPython C API is only legal when an OS thread is explicitly +attached to the interpreter runtime. In the GIL-enabled build, this happens when +the GIL is acquired. In the free-threaded build there is no GIL, but the same C +macros that release or acquire the GIL in the GIL-enabled build instead ask the +interpreter to attach the thread to the Python runtime, and there can be many +threads simultaneously attached. + +The main reason for attaching to the Python runtime is to interact with Python +objects or call into the CPython C API. To interact with the Python runtime, the +thread must register itself by attaching to the interpreter runtime. In the GIL-enabled build, releasing the GIL allows other threads to proceed. This is no longer necessary in the free-threaded build, but you should -still release the GIL when doing long-running tasks that do not require the -CPython runtime, since releasing the GIL unblocks running the Python garbage -collector and freeing unused memory. +still detach from the interpreter runtime using [`Python::allow_threads`] when +doing long-running tasks that do not require the CPython runtime, since +detaching from the runtime allows the Python garbage collector to run, freeing +unused memory. ## Runtime panics for multithreaded access of mutable `pyclass` instances @@ -79,11 +76,11 @@ mutable state, it may not currently be straightforward to support free-threaded Python without the risk of runtime mutable borrow panics. PyO3 does not lock access to Python state, so if more than one thread tries to access a Python object that has already been mutably borrowed, only runtime checking enforces -safety around mutably aliased data owned by the Python interpreter. We believe -that it would require adding an `unsafe impl` for `Send` or `Sync` to trigger -this behavior. Please report any issues related to runtime borrow checker errors -on mutable pyclass implementations that do not make strong assumptions about the -GIL. +safety around mutably aliased rust variables the Python interpreter can +access. We believe that it would require adding an `unsafe impl` for `Send` or +`Sync` to trigger this behavior in code using PyO3. Please report any issues +related to runtime borrow checker errors on mutable pyclass implementations that +do not make strong assumptions about the GIL. It was always possible to generate panics like this in PyO3 in code that releases the GIL with `allow_threads` (see [the docs on interior @@ -91,20 +88,17 @@ mutability](./class.md#bound-and-interior-mutability),) but now in free-threaded Python there are more opportunities to trigger these panics because there is no GIL. -We plan to allow user-selectable semantics for for mutable pyclass definitions in -PyO3 0.24, allowing some form of opt-in locking to emulate the GIL if -that is needed. +We plan to allow user-selectable semantics for mutable pyclass definitions in +PyO3 0.24, allowing some form of opt-in locking to emulate the GIL if that is +needed. ## `GILProtected` is not exposed `GILProtected` is a PyO3 type that allows mutable access to static data by leveraging the GIL to lock concurrent access from other threads. In free-threaded Python there is no GIL, so you will need to replace this type with -some other form of locking. In many cases, `std::sync::Atomic` or -`std::sync::Mutex` will be sufficient. If the locks do not guard the execution -of arbitrary Python code or use of the CPython C API then conditional -compilation is likely unnecessary since `GILProtected` was not needed in the -first place. +some other form of locking. In many cases, a type from `std::sync::Atomic` or +a `std::sync::Mutex` will be sufficient. Before: @@ -151,6 +145,9 @@ Python::with_gil(|py| { If you are executing arbitrary Python code while holding the lock, then you will need to use conditional compilation to use `GILProtected` on GIL-enabled Python -builds and mutexes otherwise. Python 3.13 introduces `PyMutex`, which releases -the GIL while the lock is held, so that is another option if you only need to -support newer Python versions. +builds and mutexes otherwise. If your use of `GILProtected` does not guard the +execution of arbitrary Python code or use of the CPython C API, then conditional +compilation is likely unnecessary since `GILProtected` was not needed in the +first place and instead rust mutexes or atomics should be preferred. Python 3.13 +introduces `PyMutex`, which releases the GIL while the lock is held, so that is +another option if you only need to support newer Python versions. From 5cfc001e8c7c9057019241e6dbc76efba2a310a3 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Mon, 14 Oct 2024 12:46:43 -0600 Subject: [PATCH 05/10] add changelog --- newsfragments/4577.added.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/4577.added.md diff --git a/newsfragments/4577.added.md b/newsfragments/4577.added.md new file mode 100644 index 00000000000..71858564fe5 --- /dev/null +++ b/newsfragments/4577.added.md @@ -0,0 +1 @@ +* Added a guide page for free-threaded Python. From 349c6215c7486b8b1252aa03f3939d57ea491fbb Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Mon, 14 Oct 2024 12:52:47 -0600 Subject: [PATCH 06/10] fix TOC link --- guide/src/SUMMARY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guide/src/SUMMARY.md b/guide/src/SUMMARY.md index 1d0cdece6a0..f025d790b5d 100644 --- a/guide/src/SUMMARY.md +++ b/guide/src/SUMMARY.md @@ -25,7 +25,7 @@ - [Conversion traits](conversions/traits.md) - [Using `async` and `await`](async-await.md) - [Parallelism](parallelism.md) -- [Supporting Free-Threaded Python](free-threaded.md) +- [Supporting Free-Threaded Python](free-threading.md) - [Debugging](debugging.md) - [Features reference](features.md) - [Performance](performance.md) From 927b0df4c8e05e4431c00a6da97d3f10f60543a5 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Fri, 18 Oct 2024 11:25:39 -0600 Subject: [PATCH 07/10] apply code review suggestions --- guide/src/free-threading.md | 48 +++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/guide/src/free-threading.md b/guide/src/free-threading.md index d6cc597de74..0f172ec7787 100644 --- a/guide/src/free-threading.md +++ b/guide/src/free-threading.md @@ -4,7 +4,7 @@ CPython 3.13 introduces an experimental "free-threaded" build of CPython that does not rely on the [global interpreter lock](https://docs.python.org/3/glossary.html#term-global-interpreter-lock) (often referred to as the GIL) for thread safety. As of version 0.23, PyO3 also -has preliminary support for building rust extensions for the free-threaded +has preliminary support for building Rust extensions for the free-threaded Python build and support for calling into free-threaded Python from Rust. If you want more background on free-threaded Python in general, see the [what's @@ -30,9 +30,9 @@ PyO3's support for free-threaded Python will enable authoring native Python extensions that are thread-safe by construction, with much stronger safety guarantees than C extensions. Our goal is to enable ["fearless concurrency"](https://doc.rust-lang.org/book/ch16-00-concurrency.html) in the -native Python runtime by building on the rust `Send` and `Sync` traits. +native Python runtime by building on the Rust `Send` and `Sync` traits. -This document provides advice for porting rust code using PyO3 to run under +This document provides advice for porting Rust code using PyO3 to run under free-threaded Python. While many simple PyO3 uses, like defining an immutable Python class, will likely work "out of the box", there are currently some limitations. @@ -45,7 +45,7 @@ change the names of these types to de-emphasize the role of the GIL in future versions of PyO3, but for now you should remember that the use of the term `GIL` in functions and types like `with_gil` and `GILOnceCell` is historical. -Instead, you can think about whether or not a rust thread is attached to a +Instead, you can think about whether or not a Rust thread is attached to a Python interpreter runtime. See [PEP 703](https://peps.python.org/pep-0703/#thread-states) for more background about how threads can be attached and detached from the interpreter runtime, in a @@ -60,14 +60,26 @@ threads simultaneously attached. The main reason for attaching to the Python runtime is to interact with Python objects or call into the CPython C API. To interact with the Python runtime, the -thread must register itself by attaching to the interpreter runtime. - -In the GIL-enabled build, releasing the GIL allows other threads to -proceed. This is no longer necessary in the free-threaded build, but you should -still detach from the interpreter runtime using [`Python::allow_threads`] when -doing long-running tasks that do not require the CPython runtime, since -detaching from the runtime allows the Python garbage collector to run, freeing -unused memory. +thread must register itself by attaching to the interpreter runtime. If you are +not yet attached to the Python runtime, you can register the thread using the +[`Python::with_gil`] function. Threads created via the Python `threading` module +do not not need to do this, but all other OS threads that interact with the +Python runtime must explicitly attach using `with_gil` and obtain a `'py` +liftime. + +In the GIL-enabled build, PyO3 uses the `Python<'py>` type and the `'py` lifetime +to signify that the global interpreter lock is held. In the freethreaded build, +holding a `'py` lifetime means the thread is currently attached to the Python +interpreter but other threads might be simultaneously interacting with the +Python runtime. + +Since there is no GIL in the free-threaded build, releasing the GIL for +long-running tasks is no longer necessary to ensure other threads run, but you +should still detach from the interpreter runtime using [`Python::allow_threads`] +when doing long-running tasks that do not require the CPython runtime. The +garbage collector can only run if all threads are detached from the runtime (in +a stop-the-world state), so detaching from the runtime allows freeing unused +memory. ## Runtime panics for multithreaded access of mutable `pyclass` instances @@ -76,7 +88,7 @@ mutable state, it may not currently be straightforward to support free-threaded Python without the risk of runtime mutable borrow panics. PyO3 does not lock access to Python state, so if more than one thread tries to access a Python object that has already been mutably borrowed, only runtime checking enforces -safety around mutably aliased rust variables the Python interpreter can +safety around mutably aliased Rust variables the Python interpreter can access. We believe that it would require adding an `unsafe impl` for `Send` or `Sync` to trigger this behavior in code using PyO3. Please report any issues related to runtime borrow checker errors on mutable pyclass implementations that @@ -136,8 +148,8 @@ Python::with_gil(|py| { // stand-in for something that executes arbitrary Python code let d = PyDict::new(py); d.set_item(PyNone::get(py), PyNone::get(py)).unwrap(); - // we're not executing Python code while holding the lock, so GILProtected - // was never needed + // as with any `Mutex` usage, lock the mutex for as little time as possible + // in this case, we do it just while pushing into the `Vec` OBJECTS.lock().unwrap().push(d.unbind()); }); # } @@ -148,6 +160,6 @@ need to use conditional compilation to use `GILProtected` on GIL-enabled Python builds and mutexes otherwise. If your use of `GILProtected` does not guard the execution of arbitrary Python code or use of the CPython C API, then conditional compilation is likely unnecessary since `GILProtected` was not needed in the -first place and instead rust mutexes or atomics should be preferred. Python 3.13 -introduces `PyMutex`, which releases the GIL while the lock is held, so that is -another option if you only need to support newer Python versions. +first place and instead Rust mutexes or atomics should be preferred. Python 3.13 +introduces `PyMutex`, which releases the GIL while the waiting for the lock, so +that is another option if you only need to support newer Python versions. From 38e910f7a1ed7040a153ad6c61db5c1d0dbd9dec Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Fri, 18 Oct 2024 11:57:31 -0600 Subject: [PATCH 08/10] apply suggestions from code review --- guide/src/free-threading.md | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/guide/src/free-threading.md b/guide/src/free-threading.md index 0f172ec7787..8a5d1c0b304 100644 --- a/guide/src/free-threading.md +++ b/guide/src/free-threading.md @@ -15,16 +15,18 @@ for porting C extensions, and [PEP 703](https://peps.python.org/pep-0703/), which provides the technical background for the free-threading implementation in CPython. -The main benefit for supporting free-threaded Python is that both pure Python -code and code interacting with Python via the CPython C API can run -simultaneously on multiple OS threads. This means it's much more -straightforward to achieve multithreaded parallelism in the Python layer, using -the `threading` module, without any need to ensure the GIL is released during -compute-heavy tasks, or any worries about [Amdahl's -law](https://en.wikipedia.org/wiki/Amdahl%27s_law) limiting parallel -performance. If you have ever needed to use `multiprocessing` to achieve a -speedup for some Python code, free-threading will likely allow the use of Python -threads instead for the same workflow. +In the GIL-enabled build, the global interpreter lock serializes access to the +Python runtime. The GIL is therefore a fundamental limitation to parallel +scaling of multithreaded Python workflows, due to [Amdahl's +law](https://en.wikipedia.org/wiki/Amdahl%27s_law), because any time spent +executing a parallel processing task on only one execution context fundamentally +cannot be sped up using parallelism. + +The free-threaded build removes this limit on multithreaded Python scaling. This +means it's much more straightforward to achieve parallelism using the Python +`threading` module. If you have ever needed to use `multiprocessing` to achieve +a parallel speedup for some Python code, free-threading will likely allow the +use of Python threads instead for the same workflow. PyO3's support for free-threaded Python will enable authoring native Python extensions that are thread-safe by construction, with much stronger safety @@ -109,7 +111,7 @@ needed. `GILProtected` is a PyO3 type that allows mutable access to static data by leveraging the GIL to lock concurrent access from other threads. In free-threaded Python there is no GIL, so you will need to replace this type with -some other form of locking. In many cases, a type from `std::sync::Atomic` or +some other form of locking. In many cases, a type from `std::sync::atomic` or a `std::sync::Mutex` will be sufficient. Before: From c167b344dfeb0f224336942391786cf6d44755e5 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Fri, 18 Oct 2024 14:58:37 -0600 Subject: [PATCH 09/10] add code example to illustrate runtime borrow panics --- guide/src/free-threading.md | 79 +++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/guide/src/free-threading.md b/guide/src/free-threading.md index 8a5d1c0b304..7f43f265490 100644 --- a/guide/src/free-threading.md +++ b/guide/src/free-threading.md @@ -83,24 +83,69 @@ garbage collector can only run if all threads are detached from the runtime (in a stop-the-world state), so detaching from the runtime allows freeing unused memory. -## Runtime panics for multithreaded access of mutable `pyclass` instances - -If you wrote code that makes strong assumptions about the GIL protecting shared -mutable state, it may not currently be straightforward to support free-threaded -Python without the risk of runtime mutable borrow panics. PyO3 does not lock -access to Python state, so if more than one thread tries to access a Python -object that has already been mutably borrowed, only runtime checking enforces -safety around mutably aliased Rust variables the Python interpreter can -access. We believe that it would require adding an `unsafe impl` for `Send` or -`Sync` to trigger this behavior in code using PyO3. Please report any issues -related to runtime borrow checker errors on mutable pyclass implementations that -do not make strong assumptions about the GIL. - -It was always possible to generate panics like this in PyO3 in code that -releases the GIL with `allow_threads` (see [the docs on interior +## Exceptions and panics for multithreaded access of mutable `pyclass` instances + +Data attached to `pyclass` instances is protected from concurrent access by a +`RefCell`-like pattern of runtime borrow checking. Like a `RefCell`, PyO3 will +raise exceptions (or in some cases panic) to enforce exclusive access for +mutable borrows. It was always possible to generate panics like this in PyO3 in +code that releases the GIL with `allow_threads` or caling a `pymethod` accepting +`&self` from a `&mut self` (see [the docs on interior mutability](./class.md#bound-and-interior-mutability),) but now in free-threaded -Python there are more opportunities to trigger these panics because there is no -GIL. +Python there are more opportunities to trigger these panics from Python because +there is no GIL to lock concurrent access to mutably borrowed data from Python. + +The most straightforward way to trigger this problem to use the Python +`threading` module to simultaneously call a rust function that mutably borrows a +`pyclass`. For example, consider the following `PyClass` implementation: + +``` +# use python::prelude::*; +# fn main() { +#[pyclass] +#[derive(Default)] +struct ThreadIter { + count: usize, +} + +#[pymethods] +impl ThreadIter { + #[new] + pub fn new() -> Self { + Default::default() + } + + fn __next__(&mut self, py: Python<'_>) -> usize { + self.count += 1; + self.count + } +# } +``` + +And then if we do something like this in Python: + +``` +import concurrent.futures +from my_module import ThreadIter + +i = ThreadIter() + +def increment(): + next(i) + +with concurrent.futures.ThreadPoolExecutor(max_workers=16) as tpe: + futures = [tpe.submit(increment) for _ in range(100)] + [f.result() for f in futures] +``` + +We will see an exception: + +``` +Traceback (most recent call last) + File "example.py", line 5, in + next(i) +RuntimeError: Already borrowed +``` We plan to allow user-selectable semantics for mutable pyclass definitions in PyO3 0.24, allowing some form of opt-in locking to emulate the GIL if that is From 66f09b8bdb2257db734c73ea4b1d597358185b77 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Mon, 21 Oct 2024 09:33:55 -0600 Subject: [PATCH 10/10] fix doctests --- guide/src/free-threading.md | 7 ++++--- src/lib.rs | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/guide/src/free-threading.md b/guide/src/free-threading.md index 7f43f265490..77b2ff327a2 100644 --- a/guide/src/free-threading.md +++ b/guide/src/free-threading.md @@ -100,7 +100,7 @@ The most straightforward way to trigger this problem to use the Python `pyclass`. For example, consider the following `PyClass` implementation: ``` -# use python::prelude::*; +# use pyo3::prelude::*; # fn main() { #[pyclass] #[derive(Default)] @@ -119,12 +119,13 @@ impl ThreadIter { self.count += 1; self.count } +} # } ``` And then if we do something like this in Python: -``` +```python import concurrent.futures from my_module import ThreadIter @@ -140,7 +141,7 @@ with concurrent.futures.ThreadPoolExecutor(max_workers=16) as tpe: We will see an exception: -``` +```text Traceback (most recent call last) File "example.py", line 5, in next(i) diff --git a/src/lib.rs b/src/lib.rs index 7de32ca264f..247b42ac372 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -501,6 +501,7 @@ pub mod doc_test { "guide/src/exception.md" => guide_exception_md, "guide/src/faq.md" => guide_faq_md, "guide/src/features.md" => guide_features_md, + "guide/src/free-threading.md" => guide_free_threading_md, "guide/src/function.md" => guide_function_md, "guide/src/function/error-handling.md" => guide_function_error_handling_md, "guide/src/function/signature.md" => guide_function_signature_md,