From d8e72575f01503b6135d638d19fc439a656aefb8 Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Wed, 21 Jun 2023 12:28:03 -0500 Subject: [PATCH 1/7] add __awkward_function__ method --- src/dask_awkward/lib/core.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 001fefd5..e77359b1 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -1197,6 +1197,17 @@ def clear_divisions(self) -> None: """Clear the divisions of a Dask Awkward Collection.""" self._divisions = (None,) * (self.npartitions + 1) + def __awkward_function__(self, func, args, kwargs): + import dask_awkward + + if any(isinstance(arg, ak.Array) for arg in args): + raise TypeError("cannot mix awkward.Array and dask_awkward.Array") + + fn_name = func.__qualname__ + fn = getattr(dask_awkward, fn_name) + res = fn(*args, **kwargs) + return res + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if method != "__call__": raise RuntimeError("Array ufunc supports only method == '__call__'") From c7b6bc08a9f5dcf1fe20fd3cd908b120790a21aa Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 22 Jun 2023 16:00:17 +0100 Subject: [PATCH 2/7] feat: update for latest iteration of awkward PR --- src/dask_awkward/lib/core.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index e77359b1..8312e13b 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -1197,16 +1197,18 @@ def clear_divisions(self) -> None: """Clear the divisions of a Dask Awkward Collection.""" self._divisions = (None,) * (self.npartitions + 1) - def __awkward_function__(self, func, args, kwargs): + def __awkward_function__(self, func, array_likes, args, kwargs): import dask_awkward - if any(isinstance(arg, ak.Array) for arg in args): + if any(isinstance(arg, ak.Array) for arg in array_likes): raise TypeError("cannot mix awkward.Array and dask_awkward.Array") fn_name = func.__qualname__ - fn = getattr(dask_awkward, fn_name) - res = fn(*args, **kwargs) - return res + try: + fn = getattr(dask_awkward, fn_name) + except AttributeError: + return NotImplemented + return fn(*args, **kwargs) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if method != "__call__": From add0ca13987c55f939e6219031d3da0689db6dca Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Thu, 22 Jun 2023 12:55:15 -0500 Subject: [PATCH 3/7] remove passing xfail (#296) --- tests/test_structure.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_structure.py b/tests/test_structure.py index 5be93d9a..48d7acba 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -468,7 +468,6 @@ def test_from_regular(caa): ) -@pytest.mark.xfail(reason="typetracer") def test_to_regular(caa): regular = ak.to_packed(caa[[0, 4, 5, 9, 10, 14]].points.x) dregular = dak.from_awkward(regular, 3) From 65e5502885a500e9ce5c2a9edb30bd8c3581f272 Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Thu, 22 Jun 2023 18:35:31 -0500 Subject: [PATCH 4/7] bump awkward requirement (#297) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 14f25803..f1b2a144 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ "Topic :: Scientific/Engineering", ] dependencies = [ - "awkward >=2.2.2", + "awkward >=2.2.4", "dask >=2023.04.0", ] dynamic = ["version"] From 7619d9719f3bdacb69b14d0324b6d6f057b39171 Mon Sep 17 00:00:00 2001 From: Iason Krommydas Date: Fri, 23 Jun 2023 22:53:01 +0300 Subject: [PATCH 5/7] feat: add `drop_none` method (#298) --- src/dask_awkward/lib/__init__.py | 1 + src/dask_awkward/lib/structure.py | 23 +++++++++++++++++++++++ tests/test_structure.py | 10 ++++++++++ 3 files changed, 34 insertions(+) diff --git a/src/dask_awkward/lib/__init__.py b/src/dask_awkward/lib/__init__.py index 9e8b0d49..bdb90cb9 100644 --- a/src/dask_awkward/lib/__init__.py +++ b/src/dask_awkward/lib/__init__.py @@ -47,6 +47,7 @@ cartesian, combinations, copy, + drop_none, fill_none, firsts, flatten, diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index 6c2d3371..6ab7d0b2 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -29,6 +29,7 @@ "cartesian", "combinations", "copy", + "drop_none", "fill_none", "firsts", "flatten", @@ -334,6 +335,28 @@ def fill_none( return map_partitions(fn, array, label="fill-none", output_divisions=1) +class _DropNoneFn: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __call__(self, arr): + return ak.drop_none(arr, **self.kwargs) + + +@borrow_docstring(ak.drop_none) +def drop_none( + array: Array, + axis: int | None = None, + highlevel: bool = True, + behavior: dict | None = None, +) -> Array: + if not highlevel: + raise ValueError("Only highlevel=True is supported") + + fn = _DropNoneFn(axis=axis, highlevel=highlevel, behavior=behavior) + return map_partitions(fn, array, label="drop-none", output_divisions=1) + + class _FirstsFn: def __init__(self, **kwargs): self.kwargs = kwargs diff --git a/tests/test_structure.py b/tests/test_structure.py index 48d7acba..009f33d9 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -138,6 +138,16 @@ def test_fill_none(vf: int | float | str, axis: int | None) -> None: assert_eq(d, e, check_forms=(not isinstance(vf, str))) +@pytest.mark.parametrize("axis", [None, 0, 1, -1]) +def test_drop_none(axis: int) -> None: + a = [[1, 2, None], [], [None], [5, 6, 7, None], [1, 2], None] + b = [[None, 2, 1], [None], [], None, [7, 6, None, 5], [None, None]] + c = dak.from_lists([a, b]) + d = dak.drop_none(c) + e = ak.drop_none(ak.from_iter(a + b)) + assert_eq(d, e) + + @pytest.mark.parametrize("axis", [0, 1, -1]) def test_is_none(axis: int) -> None: a: list[Any] = [[1, 2, None], None, None, [], [None], [5, 6, 7, None], [1, 2], None] From bbc477ee4d7303dcfaa5ae614a4a719da6cdf53e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Jun 2023 10:33:58 -0500 Subject: [PATCH 6/7] [pre-commit.ci] pre-commit autoupdate (#302) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/charliermarsh/ruff-pre-commit: v0.0.272 → v0.0.275](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.272...v0.0.275) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9d1740b0..f4e6578d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: - --target-version=py38 - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.272 + rev: v0.0.275 hooks: - id: ruff From 868aa9027aa440d3b0b7a95f66a1751b9bce3958 Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Tue, 27 Jun 2023 13:29:05 -0500 Subject: [PATCH 7/7] fix: more cases where `output_divisions=1` should be used (#303) --- src/dask_awkward/lib/structure.py | 7 +++++-- tests/test_core.py | 18 +++++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index 6ab7d0b2..63453d9c 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -454,6 +454,7 @@ def full_like(array, fill_value, highlevel=True, behavior=None, dtype=None): highlevel=highlevel, behavior=behavior, dtype=dtype, + output_divisions=1, ) @@ -477,6 +478,7 @@ def isclose( highlevel=highlevel, behavior=behavior, label="is-close", + output_divisions=1, ) @@ -564,6 +566,7 @@ def num( axis=axis, highlevel=highlevel, behavior=behavior, + output_divisions=1, ) if axis == 0: return len(array) @@ -582,10 +585,10 @@ def ones_like( return map_partitions( ak.ones_like, array, - output_divisions=1, label="ones-like", behavior=behavior, dtype=dtype, + output_divisions=1, ) @@ -990,10 +993,10 @@ def zeros_like( return map_partitions( ak.zeros_like, array, - output_divisions=1, label="zeros-like", behavior=behavior, dtype=dtype, + output_divisions=1, ) diff --git a/tests/test_core.py b/tests/test_core.py index cc132b4b..c50d2d22 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -463,6 +463,23 @@ def test_compatible_partitions(ndjson_points_file: str) -> None: assert compatible_partitions(y, y) +def test_compatible_partitions_after_slice() -> None: + a = [[1, 2, 3], [4, 5]] + b = [[5, 6, 7, 8], [], [9]] + lazy = dak.from_lists([a, b]) + ccrt = ak.Array(a + b) + + # sanity + assert_eq(lazy, ccrt) + + # sanity + assert compatible_partitions(lazy, lazy + 2) + assert compatible_partitions(lazy, dak.num(lazy, axis=1) > 2) + + assert not compatible_partitions(lazy[:-2], lazy) + assert not compatible_partitions(lazy[:-2], dak.num(lazy, axis=1) != 3) + + @pytest.mark.parametrize("meta", [5, False, [1, 2, 3]]) def test_bad_meta_type(ndjson_points_file: str, meta: Any) -> None: with pytest.raises(TypeError, match="meta must be an instance of an Awkward Array"): @@ -522,7 +539,6 @@ def test_scalar_persist_and_rebuild(daa: Array) -> None: def test_output_divisions(daa: Array) -> None: assert dak.max(daa.points.y, axis=1).divisions == daa.divisions - assert dak.num(daa.points.y, axis=1).divisions == (None,) * (daa.npartitions + 1) assert daa["points"][["x", "y"]].divisions == daa.divisions assert daa["points"].divisions == daa.divisions