From d81d22689ee8d6da348e5cdf8c90d9a81b18e510 Mon Sep 17 00:00:00 2001 From: Matt Graham Date: Mon, 7 Aug 2023 19:38:31 +0100 Subject: [PATCH] Improve docstring formatting --- docs/conf.py | 33 +++++++++++++- src/mici/interop.py | 93 +++++++++++++++++++++------------------- src/mici/matrices.py | 8 +++- src/mici/progressbars.py | 4 +- src/mici/solvers.py | 65 +++++++++++++++++++++++----- src/mici/stagers.py | 35 +++++++-------- src/mici/systems.py | 60 +++++++++++++++++++++++++- 7 files changed, 222 insertions(+), 76 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1529d4f..0899107 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,6 +6,8 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information +import collections + project = "Mici" copyright = "2023, Matt Graham" author = "Matt Graham" @@ -22,15 +24,19 @@ ] napoleon_preprocess_types = True +autoclass_content = "both" python_use_unqualified_type_names = True autodoc_typehints = "description" -autodoc_default_options = {"inherited-members": True} +autodoc_default_options = {"inherited-members": True, "special-members": "__call__"} intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "numpy": ("https://numpy.org/doc/stable/", None), + "pymc3": ("https://www.pymc.io/projects/docs/en/v3/", None), + "arviz": ("https://python.arviz.org/en/stable/", None), + "pystan": ("https://pystan.readthedocs.io/en/latest/", None) } templates_path = ["_templates"] @@ -44,3 +50,28 @@ html_static_path = ["_static"] html_logo = "_static/mici-logo-rectangular.svg" html_theme_options = {"logo_only": True} + + +# -- Post process ------------------------------------------------------------ + +# Remove duplicated NamedTuple attribute docstrings using fix suggested at +# https://stackoverflow.com/a/70459782 and also remove default constructor +# docstring for NamedTuple instances + +def remove_namedtuple_attrib_docstring(app, what, name, obj, skip, options): + if isinstance(obj, collections._tuplegetter) or str(obj) in { + "", "" + }: + return True + return skip + + +def remove_namedtuple_constructor_lines(app, what, name, obj, options, lines): + if isinstance(obj, type) and issubclass(obj, tuple): + if "Create new instance of" in lines[0]: + lines.clear() + + +def setup(app): + app.connect("autodoc-skip-member", remove_namedtuple_attrib_docstring) + app.connect("autodoc-process-docstring", remove_namedtuple_constructor_lines) diff --git a/src/mici/interop.py b/src/mici/interop.py index a5fb037..0a77b90 100644 --- a/src/mici/interop.py +++ b/src/mici/interop.py @@ -20,14 +20,15 @@ def convert_to_inference_data( energy_key: Optional[str] = "energy", lp_key: Optional[str] = "lp", ) -> arviz.InferenceData: - """Convert Mici `sample_chains` output to ArviZ `InferenceData` object. + """Convert Mici :code:`sample_chains` output to :py:class:`arviz.InferenceData` object. Args: - traces: Traces output from Mici `sample_chains` call. A dictionary of variables - traced over sampled chains with the dictionary keys the variable names and - the values a list of arrays, one array per sampled chain, with the first - array dimension corresponding to the draw index and any remaining - dimensions, the variable dimensions. + traces: Traces output from Mici + :py:meth:`mici.samplers.MarkovChainMonteCarloMethod.sample_chains` call. A + dictionary of variables traced over sampled chains with the dictionary keys + the variable names and the values a list of arrays, one array per sampled + chain, with the first array dimension corresponding to the draw index and + any remaining dimensions, the variable dimensions. stats: Statistics output from Mici `sample_chains` call. A dictionary of chain statistics traced over sampled chains with the dictionary keys the statistics names and the values a list of arrays, one array per sampled @@ -72,7 +73,7 @@ def sample_pymc3_model( jitter_max_retries: int = 10, trace: Optional[list] = None, return_inferencedata: bool = False, - model: Optional[pymc3.Model] = None, + model: Optional[pymc3.model.Model] = None, target_accept: float = 0.8, max_treedepth: int = 10, ) -> Union[arviz.InferenceData, dict[str, ArrayLike]]: @@ -80,59 +81,65 @@ def sample_pymc3_model( Uses dynamic multinomial HMC algorithm in Mici with adaptive warm-up phase. - This function replicates the interface of the `pymc3.sample` function to allow using - as a (partial) drop-in replacement. + This function replicates the interface of the :py:func:`pymc3.sampling.sample` + function to allow using as a (partial) drop-in replacement. Args: draws: The number of samples to draw. tune: Number of iterations to tune, defaults to 1000. Samplers adjust the step - sizes, scalings or similar during tuning. Tuning samples will be drawn in + sizes, scalings or similar during tuning. Tuning samples will be drawn in addition to the number specified in the `draws` argument, and will be discarded. chains: The number of chains to sample. Running independent chains is important for some convergence statistics and can also reveal multiple modes in the - posterior. If `None`, then set to either `cores` or 2, whichever is larger. - cores: The number of chains to run in parallel. If `None`, set to the number of - CPUs in the system, but at most 4. - random_seed: Seed for Numpy random number generator used for generating random - variables while sampling chains. If `None` then generator will be seeded - with entropy from operating system. + posterior. If :code::`None`, then set to either :code:`cores` or 2, + whichever is larger. + cores: The number of chains to run in parallel. If :code:`None`, set to the + number of CPU cores in the system, but at most 4. + random_seed: Seed for NumPy random number generator used for generating random + variables while sampling chains. If :code:`None` then generator will be + seeded with entropy from operating system. progressbar: Whether or not to display a progress bar. init: Initialization method to use. One of: - 1. adapt_diag: Start with a identity mass matrix and then adapt a - diagonal based on the variance of the tuning samples. All chains use the - test value (usually the prior mean) as starting point. - 2. jitter+adapt_diag: Same as ``adapt_diag``, but add uniform jitter in - [-1, 1] to the starting point in each chain. Also chosen if - `init=="auto"`. - 3. adapt_full: Adapt a dense mass matrix using the sample covariances + * :code:`"adapt_diag"`: Start with a identity mass matrix and then adapt a + diagonal based on the variance of the tuning samples. All chains use the + test value (usually the prior mean) as starting point. + * :code:`jitter+adapt_diag`: Same as :code:`"adapt_diag"`, but add uniform + jitter in [-1, 1] to the starting point in each chain. Also chosen if + :code:`init="auto"`. + * :code:`"adapt_full"`: Adapt a dense mass matrix using the sample + covariances. jitter_max_retries: Maximum number of repeated attempts (per chain) at creating - an initial matrix with uniform jitter that yields a finite probability. - This applies to `jitter+adapt_diag` and `jitter+adapt_full` init methods. - trace: A list of variables to track. If `None`, `model.unobserved_RVs` is used. - return_inferencedata: Whether to return the traces as an `arviz.InferenceData` - (`True`) object or a dict (`False`). - model: PyMC3 model defining posterior distribution to sample from. May be `None` - if function is called from within model context manager. + an initial matrix with uniform jitter that yields a finite probability. This + applies to `"jitter+adapt_diag"` and :code:`"jitter+adapt_full"` + :py:obj:`init` methods. + trace: A list of variables to track. If :code`None`, then + :code:`model.unobserved_RVs` is used. + return_inferencedata: Whether to return the traces as an + :py:class:`arviz.InferenceData` (:code:`True`) object or a + :py:class:`dict` (:code:`False`). + model: PyMC3 model defining posterior distribution to sample from. May be + :code:`None` if function is called from within model context manager. target_accept: Target value for the acceptance statistic being controlled during adaptive warm-up. max_treedepth: Maximum depth to expand trajectory binary tree to in integrator transition. The maximum number of integrator steps corresponds to - `2**max_treedepth`. + :code:`2**max_treedepth`. Returns: - A dictionary or ArviZ `InferenceData` object containing the sampled chain - output. Dictionary output (when `return_inferencedata=False`) has string keys - corresponding to the name of each traced variable in the model, with the values - being the corresponding values of the variables traced across the chains as - NumPy arrays, with the first dimension the chain index (of size equal to - `chains`), the second dimension the draw index (of size equal to `draws`) and - any remaining dimensions corresponding to the dimensions of the traced variable. - If `return_inferencedata=True` an ArviZ `InferenceData` object is instead - returned with traced chain data stored in the `posterior` group and additional - chain statistics in the `sample_stats` group. + A dictionary or :py:class:`arviz.InferenceData` object containing the sampled + chain output. Dictionary output (when :code:`return_inferencedata=False`) has + string keys corresponding to the name of each traced variable in the model, with + the values being the corresponding values of the variables traced across the + chains as NumPy arrays, with the first dimension the chain index (of size equal + to :code:`chains`), the second dimension the draw index (of size equal to + :code:`draws`) and any remaining dimensions corresponding to the dimensions of + the traced variable. If :code:`return_inferencedata=True` an + :py:class:`arviz.InferenceData` object is instead returned with traced chain + data stored in the :code:`posterior` group and additional chain statistics in + the :code:`sample_stats` group. """ import pymc3 @@ -282,8 +289,8 @@ def sample_stan_model( Uses dynamic multinomial HMC algorithm in Mici with adaptive warm-up phase. This function follows a similar argument naming scheme to the PyStan - `stan.Model.sample` method (which itself follows CmdStan) to allow using as a - (partial) drop-in replacement. + :py:meth:`stan.model.Model.sample` method (which itself follows CmdStan) to allow + using as a (partial) drop-in replacement. Args: model_code: Stan program code describing a Stan model. diff --git a/src/mici/matrices.py b/src/mici/matrices.py index b06b8bb..b620e87 100644 --- a/src/mici/matrices.py +++ b/src/mici/matrices.py @@ -1931,8 +1931,10 @@ def __init__( `(dim_inner, dim_inner)` specifying inner term in matrix product defining low-rank update. If `None` an identity matrix is used. capacitance_matrix: Square matrix equal to + inner_square_matrix.inv + right_factor_matrix @ square_matrix.inv @ left_factor_matrix + and with shape `(dim_inner, dim_inner)` which is used in constructing inverse and computation of determinant of the low-rank updated matrix, with this argument optional and typically only passed when this matrix @@ -2122,8 +2124,10 @@ def __init__( `(dim_inner, dim_inner)` specifying inner term in matrix product defining low-rank update. If `None` an identity matrix is used. capacitance_matrix: Symmetric matrix equal to + inner_symmetric_matrix.inv + factor_matrix.T @ symmetric_matrix.inv @ factor_matrix + and with shape `(dim_inner, dim_inner)` which is used in constructing inverse and computation of determinant of the low-rank updated matrix, with this argument optional and typically only passed when this matrix @@ -2240,9 +2244,11 @@ def __init__( inner_pos_def_matrix: Optional positive definite matrix with shape `(dim_inner, dim_inner)` specifying inner term in matrix product defining low-rank update. If `None` an identity matrix is used. - capacitance_matrix: Positive- definite matrix equal to + capacitance_matrix: Positive-definite matrix equal to + inner_pos_def_matrix.inv + factor_matrix.T @ pos_def_matrix.inv @ factor_matrix + and with shape `(dim_inner, dim_inner)` which is used in constructing inverse and computation of determinant of the low-rank updated matrix, with this argument optional and typically only passed when this matrix diff --git a/src/mici/progressbars.py b/src/mici/progressbars.py index 017eea2..817c305 100644 --- a/src/mici/progressbars.py +++ b/src/mici/progressbars.py @@ -198,8 +198,8 @@ def __init__( ): """ Args: - sequence: Sequence to iterate over. Must be iterable _and_ have a defined - length such that `len(sequence)` is valid. + sequence: Sequence to iterate over. Must be iterable **and** have a defined + length such that `len(sequence)` is valid. description: Description of task to prefix progress bar with. position: Tuple specifying position of progress bar within a sequence with first entry corresponding to zero-indexed position and the second entry diff --git a/src/mici/solvers.py b/src/mici/solvers.py index 0a0949c..f5c4df2 100644 --- a/src/mici/solvers.py +++ b/src/mici/solvers.py @@ -26,7 +26,15 @@ class FixedPointSolver(Protocol): """Solver for fixed point equation, returning `x` such that `func(x) = x`.""" def __call__(self, func: ArrayFunction, x0: ArrayLike, **kwargs) -> ArrayLike: - ... + """Solve fixed point equation. + + Args: + func: Function to solve for fixed point of. + x0: Point to initialize solver at. + + Returns: + Fixed point solved for. + """ def solve_fixed_point_direct( @@ -53,8 +61,8 @@ def solve_fixed_point_direct( Solution to fixed point equation with `norm(func(x) - x) < convergence_tol`. Raises: - `mici.errors.ConvergenceError` if solver does not converge within `max_iters` - iterations, diverges or encounters a `ValueError` during the iteration. + mici.errors.ConvergenceError: If solver does not converge within `max_iters` + iterations, diverges or encounters a `ValueError` during the iteration. """ for i in range(max_iters): try: @@ -108,8 +116,8 @@ def solve_fixed_point_steffensen( Solution to fixed point equation with `norm(func(x) - x) < convergence_tol`. Raises: - `mici.errors.ConvergenceError` if solver does not converge within `max_iters` - iterations, diverges or encounters a `ValueError` during the iteration. + mici.errors.ConvergenceError: If solver does not converge within `max_iters` + iterations, diverges or encounters a `ValueError` during the iteration. """ for i in range(max_iters): try: @@ -144,6 +152,8 @@ class ProjectionSolver(Protocol): Solves an equation of the form + .. code:: + r(λ) = c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) = 0 for the vector of Lagrange multipliers `λ` to project a point on to the manifold @@ -160,6 +170,17 @@ def __call__( system: System, **kwargs, ) -> ChainState: + """Solve for projection on to manifold step. + + Args: + state: Current chain state after unconstrained step. + state_prev: Previous chain state on manifold. + time_step: Integrator time step for unconstrained step. + system: Hamiltonian system dynamics are being simulated for. + + Returns: + Chain state after projection on to manifold. + """ ... @@ -181,6 +202,8 @@ def solve_projection_onto_manifold_quasi_newton( Solves an equation of the form + .. code:: + r(λ) = c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) = 0 for the vector of Lagrange multipliers `λ` to project a point on to the manifold @@ -190,11 +213,15 @@ def solve_projection_onto_manifold_quasi_newton( The Jacobian of the residual function `r` is + .. code:: + ∂r(λ) = ∂c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) ∂₂Φ₂,₁ ∂c(q)ᵀ where `∂₂Φ₂,₁` is the Jacobian of the (linear) flow-map `Φ₂,₁` with respect to the second (momentum argument), such that the full Newton update is + .. code:: + λ_(α) = λ - ∂r(λ)⁻¹ r(λ) which requires evaluating `∂c` (`system.jacob_constr`) on each iteration and solving @@ -202,10 +229,14 @@ def solve_projection_onto_manifold_quasi_newton( The symmetric quasi-Newton iteration instead uses the approximation + .. code:: + ∂c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) ∂₂Φ₂,₁ ∂c(q)ᵀ ≈ ∂c(q) ∂₂Φ₂,₁ ∂c(q)ᵀ with the corresponding update + .. code:: + λ = λ - (∂c(q) ∂₂Φ₂,₁ ∂c(q)ᵀ)⁻¹ r(λ) allowing a previously computed decomposition of the matrix `∂c(q) ∂₂Φ₂,₁ ∂c(q)ᵀ` to @@ -237,8 +268,8 @@ def solve_projection_onto_manifold_quasi_newton( within `constraint_tol`, i.e. `norm(system.constr(state.pos)) < constraint_tol`. Raises: - `mici.errors.ConvergenceError` if solver does not converge within `max_iters` - iterations, diverges or encounters a `ValueError` during the iteration. + mici.errors.ConvergenceError: If solver does not converge within `max_iters` + iterations, diverges or encounters a `ValueError` during the iteration. """ mu = np.zeros_like(state.pos) jacob_constr_prev = system.jacob_constr(state_prev) @@ -294,6 +325,8 @@ def solve_projection_onto_manifold_newton( Solves an equation of the form + .. code:: + r(λ) = c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) = 0 for the vector of Lagrange multipliers `λ` to project a point on to the manifold @@ -303,11 +336,15 @@ def solve_projection_onto_manifold_newton( The Jacobian of the residual function `r` is + .. code:: + ∂r(λ) = ∂c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) ∂₂Φ₂,₁ ∂c(q)ᵀ where `∂₂Φ₂,₁` is the Jacobian of the (linear) flow-map `Φ₂,₁` with respect to the second (momentum argument), such that the Newton update is + .. code:: + λ = λ - ∂r(λ)⁻¹ r(λ) which requires evaluating `∂c` (`system.jacob_constr`) on each iteration and solving @@ -338,8 +375,8 @@ def solve_projection_onto_manifold_newton( within `constraint_tol`, i.e. `norm(system.constr(state.pos)) < constraint_tol`. Raises: - `mici.errors.ConvergenceError` if solver does not converge within `max_iters` - iterations, diverges or encounters a `ValueError` during the iteration. + mici.errors.ConvergenceError: If solver does not converge within `max_iters` + iterations, diverges or encounters a `ValueError` during the iteration. """ mu = np.zeros_like(state.pos) jacob_constr_prev = system.jacob_constr(state_prev) @@ -399,6 +436,8 @@ def solve_projection_onto_manifold_newton_with_line_search( Solves an equation of the form + .. code:: + r(λ) = c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) = 0 for the vector of Lagrange multipliers `λ` to project a point on to the manifold @@ -408,11 +447,15 @@ def solve_projection_onto_manifold_newton_with_line_search( The Jacobian of the residual function `r` is + .. code:: + ∂r(λ) = ∂c(Φ₂,₁(q, p + ∂c(q)ᵀλ)) ∂₂Φ₂,₁ ∂c(q)ᵀ where `∂₂Φ₂,₁` is the Jacobian of the (linear) flow-map `Φ₂,₁` with respect to the second (momentum argument), such that the scaled Newton update is + .. code:: + λ_(α) = λ - α * ∂r(λ)⁻¹ r(λ) which requires evaluating `∂c` (`system.jacob_constr`) on each iteration and solving @@ -449,8 +492,8 @@ def solve_projection_onto_manifold_newton_with_line_search( within `constraint_tol`, i.e. `norm(system.constr(state.pos)) < constraint_tol`. Raises: - `mici.errors.ConvergenceError` if solver does not converge within `max_iters` - iterations, diverges or encounters a `ValueError` during the iteration. + mici.errors.ConvergenceError: If solver does not converge within `max_iters` + iterations, diverges or encounters a `ValueError` during the iteration. """ mu = np.zeros_like(state.pos) jacob_constr_prev = system.jacob_constr(state_prev) diff --git a/src/mici/stagers.py b/src/mici/stagers.py index bdef429..3f1dfb4 100644 --- a/src/mici/stagers.py +++ b/src/mici/stagers.py @@ -10,22 +10,23 @@ from numpy.typing import ArrayLike from mici.adapters import Adapter from mici.states import ChainState + from mici.types import TraceFunction class ChainStage(NamedTuple): - """Parameters of chain stage.""" + """Parameters of chain stage. + + Parameters: + n_iter: Number of iterations in chain stage. + adapters: Dictionary of adapters to apply to each transition in chain stage. + trace_funcs: Functions defining chain variables to trace during chain stage. + record_stats: Whether to record statistics and traces during chain stage. + """ n_iter: int - """Number of iterations in chain stage.""" - adapters: dict[str, Iterable[Adapter]] - """Dictionary of adapters to apply to each transition in chain stage.""" - - trace_funcs: Iterable[Callable[[ChainState], dict[str, ArrayLike]]] - """""" - + trace_funcs: tuple[TraceFunction] record_stats: bool - """Whether to record statistics and traces during chain stage.""" class Stager(abc.ABC): @@ -37,7 +38,7 @@ def stages( n_warm_up_iter: int, n_main_iter: int, adapters: dict[str, Iterable[Adapter]], - trace_funcs: Iterable[Callable[[ChainState], dict[str, ArrayLike]]], + trace_funcs: Iterable[TraceFunction], trace_warm_up: bool = False, ) -> dict[str, ChainStage]: """Create dictionary specifying labels and parameters of chain sampling stages. @@ -106,7 +107,7 @@ def stages( sampling_stages["Adaptive warm up"] = ChainStage( n_iter=n_warm_up_iter, adapters=adapters, - trace_funcs=warm_up_trace_funcs, + trace_funcs=tuple(warm_up_trace_funcs), record_stats=trace_warm_up, ) # main non-adaptive stage @@ -114,7 +115,7 @@ def stages( sampling_stages["Main non-adaptive"] = ChainStage( n_iter=n_main_iter, adapters=None, - trace_funcs=trace_funcs, + trace_funcs=tuple(trace_funcs), record_stats=True, ) return sampling_stages @@ -191,7 +192,7 @@ def stages( n_warm_up_iter: int, n_main_iter: int, adapters: dict[str, Iterable[Adapter]], - trace_funcs: Iterable[Callable[[ChainState], dict[str, ArrayLike]]], + trace_funcs: Iterable[TraceFunction], trace_warm_up: bool = False, ) -> dict[str, ChainStage]: fast_adapters = { @@ -221,7 +222,7 @@ def stages( sampling_stages["Initial fast adaptive"] = ChainStage( n_iter=n_init_fast_stage_iter, adapters=fast_adapters, - trace_funcs=warm_up_trace_funcs, + trace_funcs=tuple(warm_up_trace_funcs), record_stats=record_stats, ) # growing size slow adaptation windows @@ -250,14 +251,14 @@ def stages( ] = ChainStage( n_iter=n_iter, adapters=adapters, - trace_funcs=warm_up_trace_funcs, + trace_funcs=tuple(warm_up_trace_funcs), record_stats=record_stats, ) # final fast adaptation stage sampling_stages["Final fast adaptive"] = ChainStage( n_iter=n_final_fast_stage_iter, adapters=fast_adapters, - trace_funcs=warm_up_trace_funcs, + trace_funcs=tuple(warm_up_trace_funcs), record_stats=record_stats, ) # main non-adaptive stage @@ -265,7 +266,7 @@ def stages( sampling_stages["Main non-adaptive"] = ChainStage( n_iter=n_main_iter, adapters=None, - trace_funcs=trace_funcs, + trace_funcs=tuple(trace_funcs), record_stats=True, ) return sampling_stages diff --git a/src/mici/systems.py b/src/mici/systems.py index 8933667..2a51635 100644 --- a/src/mici/systems.py +++ b/src/mici/systems.py @@ -532,6 +532,8 @@ def __init__( the ratio of the prior density (specified by `neg_log_dens`) and the square-root of the determinant of the Gram matrix defined by + .. code-block:: + gram(q) = jacob_constr(q) @ inv(metric) @ jacob_constr(q).T where `jacob_constr` is the Jacobian of the constraint function `constr` @@ -574,6 +576,8 @@ def __init__( constraint function `c = constr(q)` with respect to the position array argument `q`, returning the computed Jacobian as a 2D array `jacob` with + .. code-block:: + jacob[i, j] = ∂c[i] / ∂q[j] Optionally the function may instead return a 2-tuple of values with the @@ -649,6 +653,8 @@ def gram(self, state: ChainState) -> matrices.PositiveDefiniteMatrix: The Gram matrix as a position `q` is defined as + .. code-block:: + gram(q) = jacob_constr(q) @ inv(metric) @ jacob_constr(q).T where `jacob_constr` is the Jacobian of the constraint function `constr` and @@ -763,6 +769,8 @@ def __init__( with respect to the Hausdorff measure on the manifold corresponding to the ratio of the prior density (specified by `neg_log_dens`) and the square-root of the determinant of the Gram matrix defined by + + .. code-block:: gram(q) = jacob_constr(q) @ inv(metric) @ jacob_constr(q).T @@ -805,6 +813,8 @@ def __init__( (matrix / 2D array of partial derivatives) of the output of the constraint function `c = constr(q)` with respect to the position array argument `q`, returning the computed Jacobian as a 2D array `jacob` with + + .. code-block:: jacob[i, j] = ∂c[i] / ∂q[j] @@ -819,12 +829,16 @@ def __init__( *matrix-Hessian-product* (MHP) of the constraint function `constr` with respect to the position array argument. The MHP is here defined as a function of a `(dim_constr, dim_pos)` shaped 2D array `m` + + .. code-block:: mhp(m) = sum(m[:, :, None] * hess[:, :, :], axis=(0, 1)) where `hess` is the `(dim_constr, dim_pos, dim_pos)` shaped vector-Hessian of `c = constr(q)` with respect to `q` i.e. the array of second-order partial derivatives of such that + + .. code-block:: hess[i, j, k] = ∂²c[i] / (∂q[j] ∂q[k]) @@ -911,6 +925,8 @@ def __init__( manifold corresponding to the ratio of the prior density (specified by `neg_log_dens`) and the square-root of the determinant of the Gram matrix defined by + + .. code-block:: gram(q) = jacob_constr(q) @ inv(metric) @ jacob_constr(q).T @@ -943,6 +959,8 @@ def __init__( constraint function `c = constr(q)` with respect to the position array argument `q`, returning the computed Jacobian as a 2D array `jacob` with + .. code-block:: + jacob[i, j] = ∂c[i] / ∂q[j] Optionally the function may instead return a 2-tuple of values with the @@ -956,6 +974,8 @@ def __init__( *matrix-Hessian-product* (MHP) of the constraint function `constr` with respect to the position array argument. The MHP is here defined as a function of a `(dim_constr, dim_pos)` shaped 2D array `m` + + .. code-block:: mhp(m) = sum(m[:, :, None] * hess[:, :, :], axis=(0, 1)) @@ -963,6 +983,8 @@ def __init__( vector-Hessian of `c = constr(q)` with respect to `q` i.e. the array of second-order partial derivatives of such that + .. code-block:: + hess[i, j, k] = ∂²c[i] / (∂q[j] ∂q[k]) Optionally the function may instead return a 3-tuple of values with the @@ -1068,6 +1090,8 @@ def __init__( argument used to specify the value of these, if any. Together this means the metric matrix representation at a position `pos` is constructed as + .. code-block:: + metric = metric_matrix_class(metric_func(pos), **metric_kwargs) The `mici.matrices.PositiveDefiniteMatrix` subclass should as a minimum @@ -1082,16 +1106,20 @@ def __init__( initializer. vjp_metric_func: Function which given a position array returns another function which takes an array as an argument and returns the - *vector-Jacobian-product* (VJP) of `metric_func` with respect to the + **vector-Jacobian-product** (VJP) of `metric_func` with respect to the position array argument. The VJP is here defined as a function of an array `v` (of the same shape as the output of `metric_func`) corresponding to + .. code-block:: + vjp(v) = sum(v[..., None] * jacob, tuple(range(v.ndim)) where `jacob` is the Jacobian of `m = metric_func(q)` wrt `q` i.e. the array of partial derivatives of the function such that + .. code-block:: + jacob[..., i] = ∂m[...] / ∂q[i] Optionally the function may instead return a 2-tuple of values with the @@ -1140,11 +1168,15 @@ def vjp_metric_func(self, state: ChainState) -> VectorJacobianProduct: The vector-Jacobian-product is here defined as a function of an array `v` (of the same shape as the output of `metric_func`) corresponding to + .. code-block:: + vjp(v) = sum(v[..., None] * jacob, axis=tuple(range(v.ndim)) where `jacob` is the Jacobian of `m = metric_func(q)` wrt `q` i.e. the array of partial derivatives of the function such that + .. code-block:: + jacob[..., i] = ∂m[...] / ∂q[i] Args: @@ -1237,12 +1269,16 @@ def __init__( the position array argument. The VJP is here defined as a function of a scalar `v` + .. code-block:: + vjp(v) = v @ grad where `grad` is the `(dim_pos,)` shaped Jacobian (gradient) of `s = metric_scalar_func(q)` with respect to `q` i.e. the array of partial derivatives of the function such that + .. code-block:: + grad[i] = ∂s / ∂q[i] Optionally the function may instead return a 2-tuple of values with the @@ -1313,12 +1349,16 @@ def __init__( to the position array argument. The VJP is here defined as a function of a 1D array `v` + .. code-block:: + vjp(v) = sum(v[:, None] * jacob[:, :], axis=0) where `jacob` is the `(dim_pos, dim_pos)` shaped Jacobian of `d = metric_diagonal_func(q)` with respect to `q` i.e. the array of partial derivatives of the function such that + .. code-block:: + jacob[i, j] = ∂d[i] / ∂q[j] Optionally the function may instead return a 2-tuple of values with the @@ -1382,12 +1422,16 @@ def __init__( the position array argument. The VJP is here defined as a function of a 2D array `v` + .. code-block:: + vjp(v) = sum(v[:, :, None] * jacob[:, :, :], axis=(0, 1)) where `jacob` is the `(dim_pos, dim_pos, dim_pos)` shaped Jacobian of `L = metric_chol_func(q)` with respect to `q` i.e. the array of partial derivatives of the function such that + .. code-block:: + jacob[i, j, k] = ∂L[i, j] / ∂q[k] Optionally the function may instead return a 2-tuple of values with the @@ -1452,12 +1496,16 @@ def __init__( *vector-Jacobian-product* (VJP) of `metric_func` with respect to the position array argument. The VJP is here defined as a function of a 2D array `v` + + .. code-block:: vjp(v) = sum(v[:, :, None] * jacob[:, :, :], axis=(0, 1)) where `jacob` is the `(dim_pos, dim_pos, dim_pos)` shaped Jacobian of `M = metric_func(q)` with respect to `q` i.e. the array of partial derivatives of the function such that + + .. code-block:: jacob[i, j, k] = ∂M[i, j] / ∂q[k] @@ -1503,6 +1551,8 @@ class SoftAbsRiemannianMetricSystem(RiemannianMetricSystem): 1D array of real eigenvalues, and `eigvec` the corresponding 2D array (orthogonal matrix) with eigenvectors as columns, then the resulting positive-definite metric matrix representation `M` is computed as + + .. code-block:: M = eigvec @ diag(softabs(eigval, softabs_coeff)) @ eigvec.T @@ -1560,12 +1610,16 @@ def __init__( *matrix-Tressian-product* (MTP) of `neg_log_dens` with respect to the position array argument. The MTP is here defined as a function of a matrix `m` corresponding to + + .. code-block:: mtp(m) = sum(m[:, :, None] * tress[:, :, :], axis=(0, 1)) where `tress` is the 'Tressian' of `f = neg_log_dens(q)` wrt `q` i.e. the 3D array of third-order partial derivatives of the scalar-valued function such that + + .. code-block:: tress[i, j, k] = ∂³f / (∂q[i] ∂q[j] ∂q[k]) @@ -1628,12 +1682,16 @@ def mtp_neg_log_dens(self, state: ChainState) -> MatrixTressianProduct: The matrix-Tressian-product (MTP) is here defined as a function of a matrix `m` corresponding to + + .. code-block:: mtp(m) = sum(m[:, :, None] * tress[:, :, :], axis=(0, 1)) where `tress` is the 'Tressian' of `f = neg_log_dens(q)` with respect to `q = state.pos` i.e. the 3D array of third-order partial derivatives of the scalar-valued function such that + + .. code-block:: tress[i, j, k] = ∂³f / (∂q[i] ∂q[j] ∂q[k])