From e0d23e134d16d03cbbe57a23187594b812a369c7 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Sat, 11 Nov 2023 00:14:32 +0000 Subject: [PATCH 1/9] feat: add new behavior protocol --- src/dask_awkward/__init__.py | 2 + src/dask_awkward/lib/core.py | 169 ++++++++++++++++------------------- tests/test_behavior.py | 23 +++-- 3 files changed, 95 insertions(+), 99 deletions(-) diff --git a/src/dask_awkward/__init__.py b/src/dask_awkward/__init__.py index 0bcbcf52..a9726941 100644 --- a/src/dask_awkward/__init__.py +++ b/src/dask_awkward/__init__.py @@ -13,6 +13,8 @@ from dask_awkward.lib.core import _type as type from dask_awkward.lib.core import ( compatible_partitions, + dask_method, + dask_property, map_partitions, partition_compatibility, ) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index a7a6458f..11eaa5ee 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -1,6 +1,5 @@ from __future__ import annotations -import inspect import keyword import logging import math @@ -10,6 +9,7 @@ from collections.abc import Callable, Hashable, Sequence from enum import IntEnum from functools import cached_property, partial, wraps +from inspect import getattr_static from numbers import Number from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union, overload @@ -68,6 +68,56 @@ log = logging.getLogger(__name__) +def make_dask_descriptor(func: Callable) -> Callable[[T, type[T], Array], Any]: + def descriptor(instance: T, owner: type[T], dask_array: Array) -> Any: + impl = func.__get__(instance, owner) + return impl(dask_array) + + return descriptor + + +def make_dask_method(func: Callable) -> Callable[[T, type[T], Array], Callable]: + def descriptor(instance: T, owner: type[T], dask_array: Array) -> Any: + def impl(*args, **kwargs): + impl = func.__get__(instance, owner) + return impl(dask_array, *args, **kwargs) + + return impl + + return descriptor + + +F = TypeVar("F", bound=Callable) +G = TypeVar("G", bound=Callable) + + +class dask_property(property): + _dask_get: Callable | None = None + _dask_set: Callable | None = None + _dask_del: Callable | None = None + + def dask_getter(self, func: F) -> F: + self._dask_get = make_dask_descriptor(func) + return func + + def dask_setter(self, func: F) -> F: + self._dask_set = make_dask_descriptor(func) + return func + + def dask_deleter(self, func: F) -> F: + self._dask_del = make_dask_descriptor(func) + return func + + +def dask_method(func: F) -> F: + def dask(dask_func_impl: G) -> G: + func._dask_get = make_dask_method(dask_func_impl) # type: ignore + return dask_func_impl + + func.dask = dask # type: ignore + return func + + class Scalar(DaskMethodsMixin, DaskOperatorMethodMixin): """Single partition Dask collection representing a lazy Scalar. @@ -1212,101 +1262,34 @@ def __getitem__(self, where): return self._getitem_single(where) - def _call_behavior_method(self, method_name: str, *args: Any, **kwargs: Any) -> Any: - """Call a behavior method for an awkward array. - If the function signature has __dunder__ parameters it is assumed that the - user wants to do the map_partitions dispatch themselves and the _meta's - behavior is called. - If there are no __dunder__ parameters in the function call then the function - is wrapped in map_partitions automatically. - """ - if hasattr(self._meta, method_name): - themethod = getattr(self._meta, method_name) - thesig = inspect.signature(themethod) - if "_dask_array_" in thesig.parameters: - if "_dask_array_" not in kwargs: - kwargs["_dask_array_"] = self - return themethod(*args, **kwargs) - return self.map_partitions( - _BehaviorMethodFn(method_name, **kwargs), - *args, - label=hyphenize(method_name), - ) - - raise AttributeError( - f"Method {method_name} is not available to this collection." - ) - - def _call_behavior_property(self, property_name: str) -> Any: - """Call a property for an awkward array. - This also allows for some internal state to be tracked via behaviors - if a user follows the pattern: - - class SomeMixin: - - @property - def the_property(self): - ... - - @property - def a_property(array_context=None) # note: this can be any name - - This pattern is caught if the property has an argument that single - argument is assumed to be the array context (i.e. self) so that self- - referenced re-indexing operations can be hidden in properties. The - user must do the appropriate dispatch of map_partitions. - - If there is no argument the property call is wrapped in map_partitions. - """ - if hasattr(self._meta.__class__, property_name): - thegetter = getattr(self._meta.__class__, property_name).fget.__get__( - self._meta - ) - thesig = inspect.signature(thegetter) - - if len(thesig.parameters) == 1: - binding = thesig.bind(self) - return thegetter(*binding.args, **binding.kwargs) - elif len(thesig.parameters) > 1: - raise RuntimeError( - "Parametrized property cannot have more than one argument, the array context!" - ) - return self.map_partitions( - _BehaviorPropertyFn(property_name), - label=hyphenize(property_name), - ) - raise AttributeError( - f"Property {property_name} is not available to this collection." - ) - - def _maybe_behavior_method(self, attr: str) -> bool: - try: - res = getattr(self._meta.__class__, attr) - return (not isinstance(res, property)) and callable(res) - except AttributeError: - return False - - def _maybe_behavior_property(self, attr: str) -> bool: - try: - res = getattr(self._meta.__class__, attr) - return isinstance(res, property) - except AttributeError: - return False + def _is_method_heuristic(self, resolved: Any) -> bool: + return callable(resolved) def __getattr__(self, attr: str) -> Any: if attr not in (self.fields or []): - # check for possible behavior method - if self._maybe_behavior_method(attr): - - def wrapper(*args, **kwargs): - return self._call_behavior_method(attr, *args, **kwargs) - - return wrapper - # check for possible behavior property - elif self._maybe_behavior_property(attr): - return self._call_behavior_property(attr) - - raise AttributeError(f"{attr} not in fields.") + try: + cls_method = getattr_static(self._meta, attr) + except AttributeError: + raise AttributeError(f"{attr} not in fields.") + else: + if hasattr(cls_method, "_dask_get"): + return cls_method._dask_get(self._meta, type(self._meta), self) + elif self._is_method_heuristic(cls_method): + + @wraps(cls_method) + def wrapper(*args, **kwargs): + return self.map_partitions( + _BehaviorMethodFn(attr, **kwargs), + *args, + label=hyphenize(attr), + ) + + return wrapper + else: + return self.map_partitions( + _BehaviorPropertyFn(attr), + label=hyphenize(attr), + ) try: # at this point attr is either a field or we'll have to # raise an exception. diff --git a/tests/test_behavior.py b/tests/test_behavior.py index 67d4f78a..7e0c0b6b 100644 --- a/tests/test_behavior.py +++ b/tests/test_behavior.py @@ -23,12 +23,21 @@ def x2(self): def point_abs(self): return np.sqrt(self.x**2 + self.y**2) - @property - def non_dask_property(self, _dask_array_=None): + @dak.dask_property + def some_property(self): return "this is a non-dask property" - def non_dask_method(self, _dask_array_=None): - return _dask_array_ + @some_property.dask_getter + def some_property_dask(self, array): + return f"this is a dask property ({type(array).__name__})" + + @dak.dask_method + def some_method(self): + return None + + @some_method.dask # type: ignore + def some_method_dask(self, array): + return array @pytest.mark.xfail( @@ -60,9 +69,11 @@ def test_property_behavior(daa_p1: dak.Array, caa_p1: ak.Array) -> None: assert daa.behavior == caa.behavior - assert daa.non_dask_property == caa.non_dask_property + assert caa.some_property == "this is a non-dask property" + assert daa.some_property == "this is a dask property (Array)" - assert repr(daa.non_dask_method()) == repr(daa) + assert repr(daa.some_method()) == repr(daa) + assert repr(caa.some_method()) == repr(None) @pytest.mark.xfail( From b54f6c5edc666eddb5e4969fa6500108f0254cbd Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 13 Nov 2023 17:42:44 +0000 Subject: [PATCH 2/9] refactor: only support `dask` --- src/dask_awkward/lib/core.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 11eaa5ee..806a22dd 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -93,21 +93,11 @@ def impl(*args, **kwargs): class dask_property(property): _dask_get: Callable | None = None - _dask_set: Callable | None = None - _dask_del: Callable | None = None - def dask_getter(self, func: F) -> F: + def dask(self, func: F) -> F: self._dask_get = make_dask_descriptor(func) return func - def dask_setter(self, func: F) -> F: - self._dask_set = make_dask_descriptor(func) - return func - - def dask_deleter(self, func: F) -> F: - self._dask_del = make_dask_descriptor(func) - return func - def dask_method(func: F) -> F: def dask(dask_func_impl: G) -> G: From 764809ba370725f2923b1279cb1e993f3a0f8396 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 13 Nov 2023 17:44:05 +0000 Subject: [PATCH 3/9] test: drop uneeded test --- tests/test_behavior.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tests/test_behavior.py b/tests/test_behavior.py index 7e0c0b6b..8b299b7d 100644 --- a/tests/test_behavior.py +++ b/tests/test_behavior.py @@ -35,7 +35,7 @@ def some_property_dask(self, array): def some_method(self): return None - @some_method.dask # type: ignore + @some_method.dask def some_method_dask(self, array): return array @@ -84,18 +84,6 @@ def test_nonexistent_behavior(daa_p1: dak.Array, daa_p2: dak.Array) -> None: daa1 = dak.with_name(daa_p1["points"], "Point", behavior=behaviors) daa2 = daa_p2 - with pytest.raises( - AttributeError, - match="Method doesnotexist is not available to this collection", - ): - daa1._call_behavior_method("doesnotexist", daa2) - - with pytest.raises( - AttributeError, - match="Property doesnotexist is not available to this collection", - ): - daa1._call_behavior_property("doesnotexist") - # in this case the field check is where we raise with pytest.raises(AttributeError, match="distance not in fields"): daa2.distance(daa1) From 492a126406cb525c1ba65627be50166080f6c16b Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 13 Nov 2023 17:50:30 +0000 Subject: [PATCH 4/9] test: fix changed registrar --- tests/test_behavior.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_behavior.py b/tests/test_behavior.py index 8b299b7d..7395e808 100644 --- a/tests/test_behavior.py +++ b/tests/test_behavior.py @@ -27,7 +27,7 @@ def point_abs(self): def some_property(self): return "this is a non-dask property" - @some_property.dask_getter + @some_property.dask def some_property_dask(self, array): return f"this is a dask property ({type(array).__name__})" From 7debfa22d9572f7731f771f09b0df3115080e0ef Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 16 Nov 2023 22:18:00 +0000 Subject: [PATCH 5/9] chore: appease mypy (for now) --- tests/test_behavior.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_behavior.py b/tests/test_behavior.py index 7395e808..a2858489 100644 --- a/tests/test_behavior.py +++ b/tests/test_behavior.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import no_type_check + import awkward as ak import numpy as np import pytest @@ -35,6 +37,7 @@ def some_property_dask(self, array): def some_method(self): return None + @no_type_check @some_method.dask def some_method_dask(self, array): return array From bf05247546d3477328e5e1cfdecaed5cb972d849 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 22 Nov 2023 11:29:23 +0000 Subject: [PATCH 6/9] fix: always return original descriptor --- src/dask_awkward/lib/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 101e52ae..a01c3a09 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -95,15 +95,15 @@ def impl(*args, **kwargs): class dask_property(property): _dask_get: Callable | None = None - def dask(self, func: F) -> F: + def dask(self, func: F) -> dask_property: self._dask_get = make_dask_descriptor(func) - return func + return self def dask_method(func: F) -> F: - def dask(dask_func_impl: G) -> G: + def dask(dask_func_impl: G) -> F: func._dask_get = make_dask_method(dask_func_impl) # type: ignore - return dask_func_impl + return func func.dask = dask # type: ignore return func From fd0671c89ca2aae4526cd8ade13a413e9ad1d439 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 28 Nov 2023 01:41:08 +0000 Subject: [PATCH 7/9] feat: support sharing impl for properties --- src/dask_awkward/lib/core.py | 39 ++++++++++++++++++++++++++++++++++-- tests/test_behavior.py | 10 +++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index a01c3a09..d7417b65 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -92,14 +92,49 @@ def impl(*args, **kwargs): G = TypeVar("G", bound=Callable) -class dask_property(property): +class _DaskProperty(property): _dask_get: Callable | None = None - def dask(self, func: F) -> dask_property: + def dask(self, func: F) -> _DaskProperty: + assert self._dask_get is None self._dask_get = make_dask_descriptor(func) return self +def _adapt_dask_get(func: Callable) -> Callable: + def wrapper(self, dask_array, *args, **kwargs): + return func(self, *args, **kwargs) + + return wrapper + + +@overload +def dask_property(maybe_func: Callable) -> _DaskProperty: + ... + + +@overload +def dask_property( + maybe_func: None = None, *, no_dispatch: bool = False +) -> Callable[[Callable], _DaskProperty]: + ... + + +def dask_property(maybe_func=None, *, no_dispatch=False): + if maybe_func is None: + + def dask_property_wrapper(func: Callable) -> _DaskProperty: + prop = _DaskProperty(func) + if no_dispatch: + return prop.dask(_adapt_dask_get(func)) + else: + return prop + + return dask_property_wrapper + else: + return _DaskProperty(maybe_func) + + def dask_method(func: F) -> F: def dask(dask_func_impl: G) -> F: func._dask_get = make_dask_method(dask_func_impl) # type: ignore diff --git a/tests/test_behavior.py b/tests/test_behavior.py index a2858489..8000a4a6 100644 --- a/tests/test_behavior.py +++ b/tests/test_behavior.py @@ -33,6 +33,10 @@ def some_property(self): def some_property_dask(self, array): return f"this is a dask property ({type(array).__name__})" + @dak.dask_property(no_dispatch=True) + def some_property_both(self): + return "this is a dask AND non-dask property" + @dak.dask_method def some_method(self): return None @@ -78,6 +82,12 @@ def test_property_behavior(daa_p1: dak.Array, caa_p1: ak.Array) -> None: assert repr(daa.some_method()) == repr(daa) assert repr(caa.some_method()) == repr(None) + assert ( + daa.some_property_both + == caa.some_property_both + == "this is a dask AND non-dask property" + ) + @pytest.mark.xfail( BAD_NP_AK_MIXIN_VERSIONING, From c1aac1cd39d00b646b4d8c086b6a836db783c58c Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 29 Nov 2023 20:46:32 +0000 Subject: [PATCH 8/9] docs: add documentation --- docs/api/behavior.rst | 23 ++++++ docs/index.rst | 1 + src/dask_awkward/lib/core.py | 133 +++++++++++++++++++++++++++++++---- 3 files changed, 142 insertions(+), 15 deletions(-) create mode 100644 docs/api/behavior.rst diff --git a/docs/api/behavior.rst b/docs/api/behavior.rst new file mode 100644 index 00000000..a239af61 --- /dev/null +++ b/docs/api/behavior.rst @@ -0,0 +1,23 @@ +Behavior +-------- + +Utilities to implement array behaviors for dask-awkward arrays. + + +.. currentmodule:: dask_awkward + + +.. autosummary:: + :toctree: generated/ + + dask_property + +.. autosummary:: + :toctree: generated/ + + dask_method + +.. raw:: html + + diff --git a/docs/index.rst b/docs/index.rst index 2626eb37..9bb01088 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -49,6 +49,7 @@ Table of Contents api/io.rst api/reducers.rst api/structure.rst + api/behavior.rst .. toctree:: :maxdepth: 1 diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index d7417b65..819d7287 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -69,7 +69,20 @@ log = logging.getLogger(__name__) -def make_dask_descriptor(func: Callable) -> Callable[[T, type[T], Array], Any]: +def _make_dask_descriptor(func: Callable) -> Callable[[T, type[T], Array], Any]: + """Adapt a function accepting a `dask_array` into a dask-awkward descriptor + that invokes and returns the user function when invoked. + + Parameters + ---------- + func : Callable dask-awkward descriptor body + + Returns + ------- + Callable + The callable dask-awkward descriptor + """ + def descriptor(instance: T, owner: type[T], dask_array: Array) -> Any: impl = func.__get__(instance, owner) return impl(dask_array) @@ -77,7 +90,21 @@ def descriptor(instance: T, owner: type[T], dask_array: Array) -> Any: return descriptor -def make_dask_method(func: Callable) -> Callable[[T, type[T], Array], Callable]: +def _make_dask_method(func: Callable) -> Callable[[T, type[T], Array], Callable]: + """Adapt a function accepting a `dask_array` and additional arguments into + a dask-awkward descriptor that invokes and returns the bound user function. + + Parameters + ---------- + func : Callable + The dask-awkward descriptor body. + + Returns + ------- + Callable + The callable dask-awkward descriptor. + """ + def descriptor(instance: T, owner: type[T], dask_array: Array) -> Any: def impl(*args, **kwargs): impl = func.__get__(instance, owner) @@ -93,15 +120,33 @@ def impl(*args, **kwargs): class _DaskProperty(property): + """A property descriptor that exposes a `.dask` method for registering + dask-awkward descriptor implementations. + """ + _dask_get: Callable | None = None def dask(self, func: F) -> _DaskProperty: assert self._dask_get is None - self._dask_get = make_dask_descriptor(func) + self._dask_get = _make_dask_descriptor(func) return self -def _adapt_dask_get(func: Callable) -> Callable: +def _adapt_naive_dask_get(func: Callable) -> Callable: + """Adapt a non-dask-awkward user-defined descriptor function into + a dask-awkward aware descriptor that invokes the original function. + + Parameters + ---------- + func : Callable + The non-dask-awkward descriptor body. + + Returns + ------- + Callable + The callable dask-awkward aware descriptor body. + """ + def wrapper(self, dask_array, *args, **kwargs): return func(self, *args, **kwargs) @@ -109,35 +154,93 @@ def wrapper(self, dask_array, *args, **kwargs): @overload -def dask_property(maybe_func: Callable) -> _DaskProperty: - ... +def dask_property(maybe_func: Callable, *, no_dispatch: bool = False) -> _DaskProperty: + """An extension of Python's built-in `property` that supports registration + of a dask getter via `.dask`. + + Parameters + ---------- + maybe_func : Callable, optional + The property getter function. + no_dispatch : bool + If True, re-use the main getter function as the Dask implementation. + + Returns + ------- + Callable + The callable dask-awkward aware descriptor factory or the descriptor itself + """ @overload def dask_property( maybe_func: None = None, *, no_dispatch: bool = False ) -> Callable[[Callable], _DaskProperty]: + """An extension of Python's built-in `property` that supports registration + of a dask getter via `.dask`. + + Parameters + ---------- + maybe_func : Callable, optional + The property getter function. + no_dispatch : bool + If True, re-use the main getter function as the Dask implementation. + + Returns + ------- + Callable + The callable dask-awkward aware descriptor factory or the descriptor itself + """ ... def dask_property(maybe_func=None, *, no_dispatch=False): - if maybe_func is None: + """An extension of Python's built-in `property` that supports registration + of a dask getter via `.dask`. - def dask_property_wrapper(func: Callable) -> _DaskProperty: - prop = _DaskProperty(func) - if no_dispatch: - return prop.dask(_adapt_dask_get(func)) - else: - return prop + Parameters + ---------- + maybe_func : Callable, optional + The property getter function. + no_dispatch : bool + If True, re-use the main getter function as the Dask implementation + Returns + ------- + Callable + The callable dask-awkward aware descriptor factory or the descriptor itself + """ + + def dask_property_wrapper(func: Callable) -> _DaskProperty: + prop = _DaskProperty(func) + if no_dispatch: + return prop.dask(_adapt_naive_dask_get(func)) + else: + return prop + + if maybe_func is None: return dask_property_wrapper else: - return _DaskProperty(maybe_func) + return dask_property_wrapper(maybe_func) def dask_method(func: F) -> F: + """Decorate an instance method to provide a mechanism for overriding the + implementation for dask-awkward arrays. + + Parameters + ---------- + func : Callable + The method implementation to decorate. + + Returns + ------- + Callable + The callable dask-awkward aware method. + """ + def dask(dask_func_impl: G) -> F: - func._dask_get = make_dask_method(dask_func_impl) # type: ignore + func._dask_get = _make_dask_method(dask_func_impl) # type: ignore return func func.dask = dask # type: ignore From bb463d86b571308252c0c9c9cf0d8605aefdc7a0 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 29 Nov 2023 20:47:14 +0000 Subject: [PATCH 9/9] docs: mention `.dask` --- src/dask_awkward/lib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 819d7287..8b871690 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -226,7 +226,7 @@ def dask_property_wrapper(func: Callable) -> _DaskProperty: def dask_method(func: F) -> F: """Decorate an instance method to provide a mechanism for overriding the - implementation for dask-awkward arrays. + implementation for dask-awkward arrays via `.dask`. Parameters ----------