From d81d22689ee8d6da348e5cdf8c90d9a81b18e510 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Mon, 7 Aug 2023 19:38:31 +0100
Subject: [PATCH] Improve docstring formatting

---
 docs/conf.py             | 33 +++++++++++++-
 src/mici/interop.py      | 93 +++++++++++++++++++++-------------------
 src/mici/matrices.py     |  8 +++-
 src/mici/progressbars.py |  4 +-
 src/mici/solvers.py      | 65 +++++++++++++++++++++++-----
 src/mici/stagers.py      | 35 +++++++--------
 src/mici/systems.py      | 60 +++++++++++++++++++++++++-
 7 files changed, 222 insertions(+), 76 deletions(-)
diff --git a/docs/conf.py b/docs/conf.py
index 1529d4f..0899107 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -6,6 +6,8 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
+import collections
+
 project = "Mici"
 copyright = "2023, Matt Graham"
 author = "Matt Graham"
@@ -22,15 +24,19 @@
 ]
 
 napoleon_preprocess_types = True
+autoclass_content = "both"
 python_use_unqualified_type_names = True
 
 autodoc_typehints = "description"
 
-autodoc_default_options = {"inherited-members": True}
+autodoc_default_options = {"inherited-members": True, "special-members": "__call__"}
 
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
     "numpy": ("https://numpy.org/doc/stable/", None),
+    "pymc3": ("https://www.pymc.io/projects/docs/en/v3/", None),
+    "arviz": ("https://python.arviz.org/en/stable/", None),
+    "pystan": ("https://pystan.readthedocs.io/en/latest/", None)
 }
 
 templates_path = ["_templates"]
@@ -44,3 +50,28 @@
 html_static_path = ["_static"]
 html_logo = "_static/mici-logo-rectangular.svg"
 html_theme_options = {"logo_only": True}
+
+
+# -- Post process ------------------------------------------------------------
+
+# Remove duplicated NamedTuple attribute docstrings using fix suggested at
+# https://stackoverflow.com/a/70459782 and also remove default constructor
+# docstring for NamedTuple instances
+
+def remove_namedtuple_attrib_docstring(app, what, name, obj, skip, options):
+    if isinstance(obj, collections._tuplegetter) or str(obj) in {
+        "<method 'count' of 'tuple' objects>", "<method 'index' of 'tuple' objects>"
+    }:
+        return True
+    return skip
+
+
+def remove_namedtuple_constructor_lines(app, what, name, obj, options, lines):
+    if isinstance(obj, type) and issubclass(obj, tuple):
+        if "Create new instance of" in lines[0]:
+            lines.clear()
+
+
+def setup(app):
+    app.connect("autodoc-skip-member", remove_namedtuple_attrib_docstring)
+    app.connect("autodoc-process-docstring", remove_namedtuple_constructor_lines)
diff --git a/src/mici/interop.py b/src/mici/interop.py
index a5fb037..0a77b90 100644
--- a/src/mici/interop.py
+++ b/src/mici/interop.py
@@ -20,14 +20,15 @@ def convert_to_inference_data(
     energy_key: Optional[str] = "energy",
     lp_key: Optional[str] = "lp",
 ) -> arviz.InferenceData:
-    """Convert Mici `sample_chains` output to ArviZ `InferenceData` object.
+    """Convert Mici :code:`sample_chains` output to :py:class:`arviz.InferenceData` object.
 
     Args:
-        traces: Traces output from Mici `sample_chains` call. A dictionary of variables
-            traced over sampled chains with the dictionary keys the variable names and
-            the values a list of arrays, one array per sampled chain, with the first
-            array dimension corresponding to the draw index and any remaining
-            dimensions, the variable dimensions.
+        traces: Traces output from Mici 
+            :py:meth:`mici.samplers.MarkovChainMonteCarloMethod.sample_chains` call. A
+            dictionary of variables traced over sampled chains with the dictionary keys
+            the variable names and the values a list of arrays, one array per sampled
+            chain, with the first array dimension corresponding to the draw index and
+            any remaining dimensions, the variable dimensions.
         stats: Statistics output from Mici `sample_chains` call. A dictionary of chain
             statistics traced over sampled chains with the dictionary keys the
             statistics names and the values a list of arrays, one array per sampled
@@ -72,7 +73,7 @@ def sample_pymc3_model(
     jitter_max_retries: int = 10,
     trace: Optional[list] = None,
     return_inferencedata: bool = False,
-    model: Optional[pymc3.Model] = None,
+    model: Optional[pymc3.model.Model] = None,
     target_accept: float = 0.8,
     max_treedepth: int = 10,
 ) -> Union[arviz.InferenceData, dict[str, ArrayLike]]:
@@ -80,59 +81,65 @@ def sample_pymc3_model(
 
     Uses dynamic multinomial HMC algorithm in Mici with adaptive warm-up phase.
 
-    This function replicates the interface of the `pymc3.sample` function to allow using
-    as a (partial) drop-in replacement.
+    This function replicates the interface of the :py:func:`pymc3.sampling.sample`
+    function to allow using as a (partial) drop-in replacement.
 
     Args:
         draws: The number of samples to draw.
         tune: Number of iterations to tune, defaults to 1000. Samplers adjust the step
-            sizes, scalings or  similar during tuning. Tuning samples will be drawn in
+            sizes, scalings or similar during tuning. Tuning samples will be drawn in
             addition to the number specified in the `draws` argument, and will be
             discarded.
         chains: The number of chains to sample. Running independent chains is important
             for some convergence statistics and can also reveal multiple modes in the
-            posterior. If `None`, then set to either `cores` or 2, whichever is larger.
-        cores: The number of chains to run in parallel. If `None`, set to the number of
-            CPUs in the system, but at most 4.
-        random_seed: Seed for Numpy random number generator used for generating random
-            variables while sampling chains. If `None` then generator will be seeded
-            with entropy from operating system.
+            posterior. If :code::`None`, then set to either :code:`cores` or 2,
+            whichever is larger.
+        cores: The number of chains to run in parallel. If :code:`None`, set to the
+            number of CPU cores in the system, but at most 4.
+        random_seed: Seed for NumPy random number generator used for generating random
+            variables while sampling chains. If :code:`None` then generator will be
+            seeded with entropy from operating system.
         progressbar: Whether or not to display a progress bar.
         init: Initialization method to use. One of:
 
-            1. adapt_diag: Start with a identity mass matrix and then adapt a
-                diagonal based on the variance of the tuning samples. All chains use the
-                test value (usually the prior mean) as starting point.
-            2. jitter+adapt_diag: Same as ``adapt_diag``, but add uniform jitter in
-                [-1, 1] to the starting point in each chain. Also chosen if
-                `init=="auto"`.
-            3.  adapt_full: Adapt a dense mass matrix using the sample covariances
+            * :code:`"adapt_diag"`: Start with a identity mass matrix and then adapt a
+              diagonal based on the variance of the tuning samples. All chains use the
+              test value (usually the prior mean) as starting point.
+            * :code:`jitter+adapt_diag`: Same as :code:`"adapt_diag"`, but add uniform
+              jitter in [-1, 1] to the starting point in each chain. Also chosen if
+              :code:`init="auto"`.
+            * :code:`"adapt_full"`: Adapt a dense mass matrix using the sample
+              covariances.
 
         jitter_max_retries: Maximum number of repeated attempts (per chain) at creating
-            an initial matrix with uniform jitter that yields a finite probability.
-            This applies to `jitter+adapt_diag` and `jitter+adapt_full` init methods.
-        trace: A list of variables to track. If `None`, `model.unobserved_RVs` is used.
-        return_inferencedata: Whether to return the traces as an `arviz.InferenceData`
-            (`True`) object or a dict (`False`).
-        model: PyMC3 model defining posterior distribution to sample from. May be `None`
-            if function is called from within model context manager.
+            an initial matrix with uniform jitter that yields a finite probability. This
+            applies to `"jitter+adapt_diag"` and :code:`"jitter+adapt_full"`
+            :py:obj:`init` methods.
+        trace: A list of variables to track. If :code`None`, then
+            :code:`model.unobserved_RVs` is used.
+        return_inferencedata: Whether to return the traces as an
+            :py:class:`arviz.InferenceData` (:code:`True`) object or a
+            :py:class:`dict` (:code:`False`).
+        model: PyMC3 model defining posterior distribution to sample from. May be 
+            :code:`None` if function is called from within model context manager.
         target_accept: Target value for the acceptance statistic being controlled during
             adaptive warm-up.
         max_treedepth: Maximum depth to expand trajectory binary tree to in integrator
             transition. The maximum number of integrator steps corresponds to
-            `2**max_treedepth`.
+            :code:`2**max_treedepth`.
 
     Returns:
-        A dictionary or ArviZ `InferenceData` object containing the sampled chain
-        output. Dictionary output (when `return_inferencedata=False`) has string keys
-        corresponding to the name of each traced variable in the model, with the values
-        being the corresponding values of the variables traced across the chains as
-        NumPy arrays, with the first dimension the chain index  (of size equal to
-        `chains`), the second dimension the draw index (of size equal to `draws`) and
-        any remaining dimensions corresponding to the dimensions of the traced variable.
-        If `return_inferencedata=True` an ArviZ `InferenceData` object is instead
-        returned with traced chain data stored in the `posterior` group and additional
-        chain statistics in the `sample_stats` group.
+        A dictionary or :py:class:`arviz.InferenceData` object containing the sampled
+        chain output. Dictionary output (when :code:`return_inferencedata=False`) has
+        string keys corresponding to the name of each traced variable in the model, with
+        the values being the corresponding values of the variables traced across the
+        chains as NumPy arrays, with the first dimension the chain index  (of size equal
+        to :code:`chains`), the second dimension the draw index (of size equal to
+        :code:`draws`) and any remaining dimensions corresponding to the dimensions of
+        the traced variable. If :code:`return_inferencedata=True` an
+        :py:class:`arviz.InferenceData` object is instead returned with traced chain
+        data stored in the :code:`posterior` group and additional chain statistics in
+        the :code:`sample_stats` group.
     """
 
     import pymc3
@@ -282,8 +289,8 @@ def sample_stan_model(
     Uses dynamic multinomial HMC algorithm in Mici with adaptive warm-up phase.
 
     This function follows a similar argument naming scheme to the PyStan
-    `stan.Model.sample` method (which itself follows CmdStan) to allow using as a
-    (partial) drop-in replacement.
+    :py:meth:`stan.model.Model.sample` method (which itself follows CmdStan) to allow
+    using as a (partial) drop-in replacement.
 
     Args:
         model_code: Stan program code describing a Stan model.
diff --git a/src/mici/matrices.py b/src/mici/matrices.py
index b06b8bb..b620e87 100644
--- a/src/mici/matrices.py
+++ b/src/mici/matrices.py
@@ -1931,8 +1931,10 @@ def __init__(
                 `(dim_inner, dim_inner)` specifying inner term in matrix product
                 defining low-rank update. If `None` an identity matrix is used.
             capacitance_matrix: Square matrix equal to
+
                     inner_square_matrix.inv
                     + right_factor_matrix @ square_matrix.inv @ left_factor_matrix
+
                 and with shape `(dim_inner, dim_inner)` which is used in constructing
                 inverse and computation of determinant of the low-rank updated matrix,
                 with this argument optional and typically only passed when this matrix
@@ -2122,8 +2124,10 @@ def __init__(
                 `(dim_inner, dim_inner)` specifying inner term in matrix product
                 defining low-rank update. If `None` an identity matrix is used.
             capacitance_matrix: Symmetric matrix  equal to
+
                     inner_symmetric_matrix.inv
                     + factor_matrix.T @  symmetric_matrix.inv @ factor_matrix
+
                 and with shape `(dim_inner, dim_inner)` which is used in constructing
                 inverse and computation of determinant of the low-rank updated matrix,
                 with this argument optional and typically only passed when this matrix
@@ -2240,9 +2244,11 @@ def __init__(
             inner_pos_def_matrix: Optional positive definite matrix with shape
                 `(dim_inner, dim_inner)` specifying inner term in matrix product
                 defining low-rank update. If `None` an identity matrix is used.
-            capacitance_matrix: Positive- definite matrix equal to
+            capacitance_matrix: Positive-definite matrix equal to
+
                     inner_pos_def_matrix.inv
                     + factor_matrix.T @ pos_def_matrix.inv @ factor_matrix
+
                 and with shape `(dim_inner, dim_inner)` which is used in constructing
                 inverse and computation of determinant of the low-rank updated matrix,
                 with this argument optional and typically only passed when this matrix
diff --git a/src/mici/progressbars.py b/src/mici/progressbars.py
index 017eea2..817c305 100644
--- a/src/mici/progressbars.py
+++ b/src/mici/progressbars.py
@@ -198,8 +198,8 @@ def __init__(
     ):
         """
         Args:
-            sequence: Sequence to iterate over. Must be iterable _and_ have a defined
-            length such that `len(sequence)` is valid.
+            sequence: Sequence to iterate over. Must be iterable **and** have a defined
+                length such that `len(sequence)` is valid.
             description: Description of task to prefix progress bar with.
             position: Tuple specifying position of progress bar within a sequence with
                 first entry corresponding to zero-indexed position and the second entry
diff --git a/src/mici/solvers.py b/src/mici/solvers.py
index 0a0949c..f5c4df2 100644
--- a/src/mici/solvers.py
+++ b/src/mici/solvers.py
@@ -26,7 +26,15 @@ class FixedPointSolver(Protocol):
     """Solver for fixed point equation, returning `x` such that `func(x) = x`."""
 
     def __call__(self, func: ArrayFunction, x0: ArrayLike, **kwargs) -> ArrayLike:
-        ...
+        """Solve fixed point equation.
+
+        Args:
+            func: Function to solve for fixed point of.
+            x0: Point to initialize solver at.
+
+        Returns:
+            Fixed point solved for.
+        """
 
 
 def solve_fixed_point_direct(
@@ -53,8 +61,8 @@ def solve_fixed_point_direct(
         Solution to fixed point equation with `norm(func(x) - x) < convergence_tol`.
 
     Raises:
-        `mici.errors.ConvergenceError` if solver does not converge within `max_iters`
-        iterations, diverges or encounters a `ValueError` during the iteration.
+        mici.errors.ConvergenceError: If solver does not converge within `max_iters`
+            iterations, diverges or encounters a `ValueError` during the iteration.
     """
     for i in range(max_iters):
         try:
@@ -108,8 +116,8 @@ def solve_fixed_point_steffensen(
         Solution to fixed point equation with `norm(func(x) - x) < convergence_tol`.
 
     Raises:
-        `mici.errors.ConvergenceError` if solver does not converge within `max_iters`
-        iterations, diverges or encounters a `ValueError` during the iteration.
+        mici.errors.ConvergenceError: If solver does not converge within `max_iters`
+            iterations, diverges or encounters a `ValueError` during the iteration.
     """
     for i in range(max_iters):
         try:
@@ -144,6 +152,8 @@ class ProjectionSolver(Protocol):
 
     Solves an equation of the form
 
+    .. code::
+
         r(λ) = c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) = 0
 
     for the vector of Lagrange multipliers `λ` to project a point on to the manifold
@@ -160,6 +170,17 @@ def __call__(
         system: System,
         **kwargs,
     ) -> ChainState:
+        """Solve for projection on to manifold step.
+
+        Args:
+            state: Current chain state after unconstrained step.
+            state_prev: Previous chain state on manifold.
+            time_step: Integrator time step for unconstrained step.
+            system: Hamiltonian system dynamics are being simulated for.
+
+        Returns:
+            Chain state after projection on to manifold.
+        """
         ...
 
 
@@ -181,6 +202,8 @@ def solve_projection_onto_manifold_quasi_newton(
 
     Solves an equation of the form
 
+    .. code::
+
         r(λ) = c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) = 0
 
     for the vector of Lagrange multipliers `λ` to project a point on to the manifold
@@ -190,11 +213,15 @@ def solve_projection_onto_manifold_quasi_newton(
 
     The Jacobian of the residual function `r` is
 
+    .. code::
+
         ∂r(λ) = ∂c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) ∂₂Φ₂,₁ ∂c(q)ᵀ
 
     where `∂₂Φ₂,₁` is the Jacobian of the (linear) flow-map `Φ₂,₁` with respect to the
     second (momentum argument), such that the full Newton update is
 
+    .. code::
+
         λ_(α) = λ - ∂r(λ)⁻¹ r(λ)
 
     which requires evaluating `∂c` (`system.jacob_constr`) on each iteration and solving
@@ -202,10 +229,14 @@ def solve_projection_onto_manifold_quasi_newton(
 
     The symmetric quasi-Newton iteration instead uses the approximation
 
+    .. code::
+
         ∂c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) ∂₂Φ₂,₁ ∂c(q)ᵀ ≈ ∂c(q) ∂₂Φ₂,₁ ∂c(q)ᵀ
 
     with the corresponding update
 
+    .. code::
+
         λ = λ - (∂c(q) ∂₂Φ₂,₁ ∂c(q)ᵀ)⁻¹ r(λ)
 
     allowing a previously computed decomposition of the matrix `∂c(q) ∂₂Φ₂,₁ ∂c(q)ᵀ` to
@@ -237,8 +268,8 @@ def solve_projection_onto_manifold_quasi_newton(
         within `constraint_tol`, i.e. `norm(system.constr(state.pos)) < constraint_tol`.
 
     Raises:
-        `mici.errors.ConvergenceError` if solver does not converge within `max_iters`
-        iterations, diverges or encounters a `ValueError` during the iteration.
+        mici.errors.ConvergenceError: If solver does not converge within `max_iters`
+            iterations, diverges or encounters a `ValueError` during the iteration.
     """
     mu = np.zeros_like(state.pos)
     jacob_constr_prev = system.jacob_constr(state_prev)
@@ -294,6 +325,8 @@ def solve_projection_onto_manifold_newton(
 
     Solves an equation of the form
 
+    .. code::
+
         r(λ) = c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) = 0
 
     for the vector of Lagrange multipliers `λ` to project a point on to the manifold
@@ -303,11 +336,15 @@ def solve_projection_onto_manifold_newton(
 
     The Jacobian of the residual function `r` is
 
+    .. code::
+
         ∂r(λ) = ∂c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) ∂₂Φ₂,₁ ∂c(q)ᵀ
 
     where `∂₂Φ₂,₁` is the Jacobian of the (linear) flow-map `Φ₂,₁` with respect to the
     second (momentum argument), such that the Newton update is
 
+    .. code::
+
         λ = λ - ∂r(λ)⁻¹ r(λ)
 
     which requires evaluating `∂c` (`system.jacob_constr`) on each iteration and solving
@@ -338,8 +375,8 @@ def solve_projection_onto_manifold_newton(
         within `constraint_tol`, i.e. `norm(system.constr(state.pos)) < constraint_tol`.
 
     Raises:
-        `mici.errors.ConvergenceError` if solver does not converge within `max_iters`
-        iterations, diverges or encounters a `ValueError` during the iteration.
+        mici.errors.ConvergenceError: If solver does not converge within `max_iters`
+            iterations, diverges or encounters a `ValueError` during the iteration.
     """
     mu = np.zeros_like(state.pos)
     jacob_constr_prev = system.jacob_constr(state_prev)
@@ -399,6 +436,8 @@ def solve_projection_onto_manifold_newton_with_line_search(
 
     Solves an equation of the form
 
+    .. code::
+
         r(λ) = c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) = 0
 
     for the vector of Lagrange multipliers `λ` to project a point on to the manifold
@@ -408,11 +447,15 @@ def solve_projection_onto_manifold_newton_with_line_search(
 
     The Jacobian of the residual function `r` is
 
+    .. code::
+
         ∂r(λ) = ∂c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) ∂₂Φ₂,₁ ∂c(q)ᵀ
 
     where `∂₂Φ₂,₁` is the Jacobian of the (linear) flow-map `Φ₂,₁` with respect to the
     second (momentum argument), such that the scaled Newton update is
 
+    .. code::
+
         λ_(α) = λ - α * ∂r(λ)⁻¹ r(λ)
 
     which requires evaluating `∂c` (`system.jacob_constr`) on each iteration and solving
@@ -449,8 +492,8 @@ def solve_projection_onto_manifold_newton_with_line_search(
         within `constraint_tol`, i.e. `norm(system.constr(state.pos)) < constraint_tol`.
 
     Raises:
-        `mici.errors.ConvergenceError` if solver does not converge within `max_iters`
-        iterations, diverges or encounters a `ValueError` during the iteration.
+        mici.errors.ConvergenceError: If solver does not converge within `max_iters`
+            iterations, diverges or encounters a `ValueError` during the iteration.
     """
     mu = np.zeros_like(state.pos)
     jacob_constr_prev = system.jacob_constr(state_prev)
diff --git a/src/mici/stagers.py b/src/mici/stagers.py
index bdef429..3f1dfb4 100644
--- a/src/mici/stagers.py
+++ b/src/mici/stagers.py
@@ -10,22 +10,23 @@
     from numpy.typing import ArrayLike
     from mici.adapters import Adapter
     from mici.states import ChainState
+    from mici.types import TraceFunction
 
 
 class ChainStage(NamedTuple):
-    """Parameters of chain stage."""
+    """Parameters of chain stage.
+    
+    Parameters:
+        n_iter: Number of iterations in chain stage.
+        adapters: Dictionary of adapters to apply to each transition in chain stage.
+        trace_funcs: Functions defining chain variables to trace during chain stage.
+        record_stats: Whether to record statistics and traces during chain stage.
+    """
 
     n_iter: int
-    """Number of iterations in chain stage."""
-
     adapters: dict[str, Iterable[Adapter]]
-    """Dictionary of adapters to apply to each transition in chain stage."""
-
-    trace_funcs: Iterable[Callable[[ChainState], dict[str, ArrayLike]]]
-    """"""
-
+    trace_funcs: tuple[TraceFunction]
     record_stats: bool
-    """Whether to record statistics and traces during chain stage."""
 
 
 class Stager(abc.ABC):
@@ -37,7 +38,7 @@ def stages(
         n_warm_up_iter: int,
         n_main_iter: int,
         adapters: dict[str, Iterable[Adapter]],
-        trace_funcs: Iterable[Callable[[ChainState], dict[str, ArrayLike]]],
+        trace_funcs: Iterable[TraceFunction],
         trace_warm_up: bool = False,
     ) -> dict[str, ChainStage]:
         """Create dictionary specifying labels and parameters of chain sampling stages.
@@ -106,7 +107,7 @@ def stages(
             sampling_stages["Adaptive warm up"] = ChainStage(
                 n_iter=n_warm_up_iter,
                 adapters=adapters,
-                trace_funcs=warm_up_trace_funcs,
+                trace_funcs=tuple(warm_up_trace_funcs),
                 record_stats=trace_warm_up,
             )
         # main non-adaptive stage
@@ -114,7 +115,7 @@ def stages(
             sampling_stages["Main non-adaptive"] = ChainStage(
                 n_iter=n_main_iter,
                 adapters=None,
-                trace_funcs=trace_funcs,
+                trace_funcs=tuple(trace_funcs),
                 record_stats=True,
             )
         return sampling_stages
@@ -191,7 +192,7 @@ def stages(
         n_warm_up_iter: int,
         n_main_iter: int,
         adapters: dict[str, Iterable[Adapter]],
-        trace_funcs: Iterable[Callable[[ChainState], dict[str, ArrayLike]]],
+        trace_funcs: Iterable[TraceFunction],
         trace_warm_up: bool = False,
     ) -> dict[str, ChainStage]:
         fast_adapters = {
@@ -221,7 +222,7 @@ def stages(
             sampling_stages["Initial fast adaptive"] = ChainStage(
                 n_iter=n_init_fast_stage_iter,
                 adapters=fast_adapters,
-                trace_funcs=warm_up_trace_funcs,
+                trace_funcs=tuple(warm_up_trace_funcs),
                 record_stats=record_stats,
             )
             # growing size slow adaptation windows
@@ -250,14 +251,14 @@ def stages(
                 ] = ChainStage(
                     n_iter=n_iter,
                     adapters=adapters,
-                    trace_funcs=warm_up_trace_funcs,
+                    trace_funcs=tuple(warm_up_trace_funcs),
                     record_stats=record_stats,
                 )
             # final fast adaptation stage
             sampling_stages["Final fast adaptive"] = ChainStage(
                 n_iter=n_final_fast_stage_iter,
                 adapters=fast_adapters,
-                trace_funcs=warm_up_trace_funcs,
+                trace_funcs=tuple(warm_up_trace_funcs),
                 record_stats=record_stats,
             )
         # main non-adaptive stage
@@ -265,7 +266,7 @@ def stages(
             sampling_stages["Main non-adaptive"] = ChainStage(
                 n_iter=n_main_iter,
                 adapters=None,
-                trace_funcs=trace_funcs,
+                trace_funcs=tuple(trace_funcs),
                 record_stats=True,
             )
         return sampling_stages
diff --git a/src/mici/systems.py b/src/mici/systems.py
index 8933667..2a51635 100644
--- a/src/mici/systems.py
+++ b/src/mici/systems.py
@@ -532,6 +532,8 @@ def __init__(
                 the ratio of the prior density (specified by `neg_log_dens`) and the
                 square-root of the determinant of the Gram matrix defined by
 
+                .. code-block::
+
                     gram(q) = jacob_constr(q) @ inv(metric) @ jacob_constr(q).T
 
                 where `jacob_constr` is the Jacobian of the constraint function `constr`
@@ -574,6 +576,8 @@ def __init__(
                 constraint function `c = constr(q)` with respect to the position array
                 argument `q`, returning the computed Jacobian as a 2D array `jacob` with
 
+               .. code-block::
+
                     jacob[i, j] = ∂c[i] / ∂q[j]
 
                 Optionally the function may instead return a 2-tuple of values with the
@@ -649,6 +653,8 @@ def gram(self, state: ChainState) -> matrices.PositiveDefiniteMatrix:
 
         The Gram matrix as a position `q` is defined as
 
+        .. code-block::
+
             gram(q) = jacob_constr(q) @ inv(metric) @ jacob_constr(q).T
 
         where `jacob_constr` is the Jacobian of the constraint function `constr` and
@@ -763,6 +769,8 @@ def __init__(
                 with respect to the Hausdorff measure on the manifold corresponding to
                 the ratio of the prior density (specified by `neg_log_dens`) and the
                 square-root of the determinant of the Gram matrix defined by
+                
+               .. code-block::
 
                     gram(q) = jacob_constr(q) @ inv(metric) @ jacob_constr(q).T
 
@@ -805,6 +813,8 @@ def __init__(
                 (matrix / 2D array of partial derivatives) of the output of the
                 constraint function `c = constr(q)` with respect to the position array
                 argument `q`, returning the computed Jacobian as a 2D array `jacob` with
+                
+               .. code-block::
 
                     jacob[i, j] = ∂c[i] / ∂q[j]
 
@@ -819,12 +829,16 @@ def __init__(
                 *matrix-Hessian-product* (MHP) of the constraint function `constr` with
                 respect to the position array argument. The MHP is here defined as a
                 function of a `(dim_constr, dim_pos)` shaped 2D array `m`
+                
+               .. code-block::
 
                     mhp(m) = sum(m[:, :, None] * hess[:, :, :], axis=(0, 1))
 
                 where `hess` is the `(dim_constr, dim_pos, dim_pos)` shaped
                 vector-Hessian of `c = constr(q)` with respect to `q` i.e. the array of
                 second-order partial derivatives of such that
+                
+               .. code-block::
 
                     hess[i, j, k] = ∂²c[i] / (∂q[j] ∂q[k])
 
@@ -911,6 +925,8 @@ def __init__(
                 manifold corresponding to the ratio of the prior density (specified by
                 `neg_log_dens`) and the square-root of the determinant of the Gram
                 matrix defined by
+                
+                .. code-block::
 
                     gram(q) = jacob_constr(q) @ inv(metric) @ jacob_constr(q).T
 
@@ -943,6 +959,8 @@ def __init__(
                 constraint function `c = constr(q)` with respect to the position array
                 argument `q`, returning the computed Jacobian as a 2D array `jacob` with
 
+                .. code-block::
+
                     jacob[i, j] = ∂c[i] / ∂q[j]
 
                 Optionally the function may instead return a 2-tuple of values with the
@@ -956,6 +974,8 @@ def __init__(
                 *matrix-Hessian-product* (MHP) of the constraint function `constr` with
                 respect to the position array argument. The MHP is here defined as a
                 function of a `(dim_constr, dim_pos)` shaped 2D array `m`
+                
+                .. code-block::
 
                     mhp(m) = sum(m[:, :, None] * hess[:, :, :], axis=(0, 1))
 
@@ -963,6 +983,8 @@ def __init__(
                 vector-Hessian of `c = constr(q)` with respect to `q` i.e. the array of
                 second-order partial derivatives of such that
 
+                .. code-block::
+
                     hess[i, j, k] = ∂²c[i] / (∂q[j] ∂q[k])
 
                 Optionally the function may instead return a 3-tuple of values with the
@@ -1068,6 +1090,8 @@ def __init__(
                 argument used to specify the value of these, if any. Together this means
                 the metric matrix representation at a position `pos` is constructed as
 
+                .. code-block::
+
                     metric = metric_matrix_class(metric_func(pos), **metric_kwargs)
 
                 The `mici.matrices.PositiveDefiniteMatrix` subclass should as a minimum
@@ -1082,16 +1106,20 @@ def __init__(
                 initializer.
             vjp_metric_func: Function which given a position array returns another
                 function which takes an array as an argument and returns the
-                *vector-Jacobian-product* (VJP) of `metric_func` with respect to the
+                **vector-Jacobian-product** (VJP) of `metric_func` with respect to the
                 position array argument. The VJP is here defined as a function of an
                 array `v` (of the same shape as the output of `metric_func`)
                 corresponding to
 
+                .. code-block::
+
                     vjp(v) = sum(v[..., None] * jacob, tuple(range(v.ndim))
 
                 where `jacob` is the Jacobian of `m = metric_func(q)` wrt `q` i.e. the
                 array of partial derivatives of the function such that
 
+                .. code-block::
+
                     jacob[..., i] = ∂m[...] / ∂q[i]
 
                 Optionally the function may instead return a 2-tuple of values with the
@@ -1140,11 +1168,15 @@ def vjp_metric_func(self, state: ChainState) -> VectorJacobianProduct:
         The vector-Jacobian-product is here defined as a function of an array `v` (of
         the same shape as the output of `metric_func`) corresponding to
 
+        .. code-block::
+
             vjp(v) = sum(v[..., None] * jacob, axis=tuple(range(v.ndim))
 
         where `jacob` is the Jacobian of `m = metric_func(q)` wrt `q` i.e. the array of
         partial derivatives of the function such that
 
+        .. code-block::
+
             jacob[..., i] = ∂m[...] / ∂q[i]
 
         Args:
@@ -1237,12 +1269,16 @@ def __init__(
                 the position array argument. The VJP is here defined as a function of a
                 scalar `v`
 
+                .. code-block::
+
                     vjp(v) = v @ grad
 
                 where `grad` is the `(dim_pos,)` shaped Jacobian (gradient) of `s =
                 metric_scalar_func(q)` with respect to `q` i.e. the array of partial
                 derivatives of the function such that
 
+                .. code-block::
+
                     grad[i] = ∂s / ∂q[i]
 
                 Optionally the function may instead return a 2-tuple of values with the
@@ -1313,12 +1349,16 @@ def __init__(
                 to the position array argument. The VJP is here defined as a function of
                 a 1D array `v`
 
+                .. code-block::
+
                     vjp(v) = sum(v[:, None] * jacob[:, :], axis=0)
 
                 where `jacob` is the `(dim_pos, dim_pos)` shaped Jacobian of `d =
                 metric_diagonal_func(q)` with respect to `q` i.e. the array of partial
                 derivatives of the function such that
 
+                .. code-block::
+
                     jacob[i, j] = ∂d[i] / ∂q[j]
 
                 Optionally the function may instead return a 2-tuple of values with the
@@ -1382,12 +1422,16 @@ def __init__(
                 the position array argument. The VJP is here defined as a function of a
                 2D array `v`
 
+                .. code-block::
+
                     vjp(v) = sum(v[:, :, None] * jacob[:, :, :], axis=(0, 1))
 
                 where `jacob` is the `(dim_pos, dim_pos, dim_pos)` shaped Jacobian of `L
                 = metric_chol_func(q)` with respect to `q` i.e. the array of partial
                 derivatives of the function such that
 
+                .. code-block::
+
                     jacob[i, j, k] = ∂L[i, j] / ∂q[k]
 
                 Optionally the function may instead return a 2-tuple of values with the
@@ -1452,12 +1496,16 @@ def __init__(
                 *vector-Jacobian-product* (VJP) of `metric_func` with respect to the
                 position array argument. The VJP is here defined as a function of a 2D
                 array `v`
+                
+                .. code-block::
 
                     vjp(v) = sum(v[:, :, None] * jacob[:, :, :], axis=(0, 1))
 
                 where `jacob` is the `(dim_pos, dim_pos, dim_pos)` shaped Jacobian of `M
                 = metric_func(q)` with respect to `q` i.e. the array of partial
                 derivatives of the function such that
+                
+                .. code-block::
 
                     jacob[i, j, k] = ∂M[i, j] / ∂q[k]
 
@@ -1503,6 +1551,8 @@ class SoftAbsRiemannianMetricSystem(RiemannianMetricSystem):
     1D array of real eigenvalues, and `eigvec` the corresponding 2D array (orthogonal
     matrix) with eigenvectors as columns, then the resulting positive-definite metric
     matrix representation `M` is computed as
+    
+    .. code-block::
 
         M = eigvec @ diag(softabs(eigval, softabs_coeff)) @ eigvec.T
 
@@ -1560,12 +1610,16 @@ def __init__(
                 *matrix-Tressian-product* (MTP) of `neg_log_dens` with respect to the
                 position array argument. The MTP is here defined as a function of a
                 matrix `m` corresponding to
+                
+                .. code-block::
 
                     mtp(m) = sum(m[:, :, None] * tress[:, :, :], axis=(0, 1))
 
                 where `tress` is the 'Tressian' of `f = neg_log_dens(q)` wrt `q` i.e.
                 the 3D array of third-order partial derivatives of the scalar-valued
                 function such that
+                
+                .. code-block::
 
                     tress[i, j, k] = ∂³f / (∂q[i] ∂q[j] ∂q[k])
 
@@ -1628,12 +1682,16 @@ def mtp_neg_log_dens(self, state: ChainState) -> MatrixTressianProduct:
 
         The matrix-Tressian-product (MTP) is here defined as a function of a matrix `m`
         corresponding to
+        
+        .. code-block::
 
             mtp(m) = sum(m[:, :, None] * tress[:, :, :], axis=(0, 1))
 
         where `tress` is the 'Tressian' of `f = neg_log_dens(q)` with respect to `q =
         state.pos` i.e. the 3D array of third-order partial derivatives of the
         scalar-valued function such that
+        
+        .. code-block::
 
             tress[i, j, k] = ∂³f / (∂q[i] ∂q[j] ∂q[k])