Skip to content

Commit

Permalink
Merge pull request #535 from dask-contrib/pre-commit-ci-update-config
Browse files Browse the repository at this point in the history
[pre-commit.ci] pre-commit autoupdate
  • Loading branch information
martindurant authored Aug 21, 2024
2 parents 758e87b + dbd4964 commit 5734d6b
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 122 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ repos:
- --target-version=py312

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.7
rev: v0.6.1
hooks:
- id: ruff

Expand Down
3 changes: 1 addition & 2 deletions docs/examples/20211111.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"metadata": {},
"outputs": [],
"source": [
"import dask_awkward as dak\n",
"import dask_awkward.data as dakd"
]
},
Expand Down Expand Up @@ -408,7 +407,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.8"
"version": "3.10.9"
}
},
"nbformat": 4,
Expand Down
100 changes: 37 additions & 63 deletions docs/examples/io-tutorial/io-00-basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,47 +23,21 @@
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">package versions:\n",
"</pre>\n"
],
"text/plain": [
"package versions:\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">awkward: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2.5</span>.<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>\n",
"</pre>\n"
],
"text/plain": [
"awkward: \u001b[1;36m2.5\u001b[0m.\u001b[1;36m2\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">dask-awkward: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2024.1</span>.<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.</span>dev6+gda37bea\n",
"</pre>\n"
],
"text/plain": [
"dask-awkward: \u001b[1;36m2024.1\u001b[0m.\u001b[1;36m3.\u001b[0mdev6+gda37bea\n"
]
},
"metadata": {},
"output_type": "display_data"
"name": "stdout",
"output_type": "stream",
"text": [
"package versions:\n",
"awkward: 2.6.7\n",
"dask-awkward: 2024.3.1.dev50+gb593f87.d20240522\n"
]
}
],
"source": [
"from __future__ import annotations\n",
"\n",
"import os\n",
"\n",
"import numpy as np\n",
"import awkward\n",
"import dask_awkward\n",
"print(\"package versions:\")\n",
Expand Down Expand Up @@ -345,13 +319,13 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m~/software/repos/dask-awkward/src/dask_awkward/lib/core.py:1508\u001b[0m, in \u001b[0;36mArray.__getattr__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m 1507\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1508\u001b[0m cls_method \u001b[38;5;241m=\u001b[39m \u001b[43mgetattr_static\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_meta\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattr\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1509\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n",
"File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/inspect.py:1853\u001b[0m, in \u001b[0;36mgetattr_static\u001b[0;34m(obj, attr, default)\u001b[0m\n\u001b[1;32m 1852\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m default\n\u001b[0;32m-> 1853\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(attr)\n",
"File \u001b[0;32m~/code/dask-awkward/src/dask_awkward/lib/core.py:1578\u001b[0m, in \u001b[0;36mArray.__getattr__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m 1577\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1578\u001b[0m cls_method \u001b[38;5;241m=\u001b[39m \u001b[43mgetattr_static\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_meta\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattr\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1579\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n",
"File \u001b[0;32m~/conda/envs/py310/lib/python3.10/inspect.py:1777\u001b[0m, in \u001b[0;36mgetattr_static\u001b[0;34m(obj, attr, default)\u001b[0m\n\u001b[1;32m 1776\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m default\n\u001b[0;32m-> 1777\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(attr)\n",
"\u001b[0;31mAttributeError\u001b[0m: distance",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscoring\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdistance\u001b[49m\n",
"File \u001b[0;32m~/software/repos/dask-awkward/src/dask_awkward/lib/core.py:1510\u001b[0m, in \u001b[0;36mArray.__getattr__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m 1508\u001b[0m cls_method \u001b[38;5;241m=\u001b[39m getattr_static(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_meta, attr)\n\u001b[1;32m 1509\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[0;32m-> 1510\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mattr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in fields.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1511\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1512\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(cls_method, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_dask_get\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n",
"File \u001b[0;32m~/code/dask-awkward/src/dask_awkward/lib/core.py:1580\u001b[0m, in \u001b[0;36mArray.__getattr__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m 1578\u001b[0m cls_method \u001b[38;5;241m=\u001b[39m getattr_static(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_meta, attr)\n\u001b[1;32m 1579\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[0;32m-> 1580\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mattr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in fields.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1581\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1582\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(cls_method, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_dask_get\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n",
"\u001b[0;31mAttributeError\u001b[0m: distance not in fields."
]
}
Expand All @@ -370,7 +344,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 10,
"id": "63d11eb1-f822-4fc9-ae0b-c10fb6c8ea32",
"metadata": {},
"outputs": [],
Expand All @@ -390,7 +364,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 11,
"id": "ad88a084-6d83-4eb7-a4a8-7befe58543d5",
"metadata": {},
"outputs": [],
Expand All @@ -400,7 +374,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 12,
"id": "532776bb-0789-45d0-9bd8-d108d5143f1a",
"metadata": {
"scrolled": true
Expand All @@ -412,7 +386,7 @@
"dask.awkward<to-json, type=Scalar, dtype=float64>"
]
},
"execution_count": 16,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -431,7 +405,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 13,
"id": "fa5d00ee-2ec1-455e-b0e8-4c64f6e8d36a",
"metadata": {},
"outputs": [],
Expand All @@ -449,7 +423,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 14,
"id": "a59ea8ad-8ca6-444c-86cd-a4a4d9fc853d",
"metadata": {},
"outputs": [],
Expand All @@ -459,7 +433,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 15,
"id": "60ef65b6-793e-40df-b2fa-f8c74b2ee8d0",
"metadata": {},
"outputs": [
Expand All @@ -469,7 +443,7 @@
"dask.awkward<from-json-files, npartitions=15>"
]
},
"execution_count": 19,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -505,14 +479,14 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 16,
"id": "62fcb593-63d5-4444-9d26-d0e23258f501",
"metadata": {},
"outputs": [],
"source": [
"dataset = dak.from_parquet(pq_dir)\n",
"free_throws = dak.str.match_substring(dataset.scoring.basket, \"freethrow\")\n",
"distances = dataset.scoring.distance[free_throws == False]\n",
"distances = dataset.scoring.distance[np.equal(free_throws, False)]\n",
"result = dak.mean(distances, axis=1)"
]
},
Expand All @@ -526,7 +500,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 17,
"id": "840b3ebe-1454-4dca-bee0-50a31f9c0df8",
"metadata": {
"scrolled": true
Expand All @@ -535,11 +509,11 @@
{
"data": {
"text/plain": [
"{'from-parquet-b7916bd949c3744cf0ec38dea00d0bd6': frozenset({'scoring.basket',\n",
"{'from-parquet-ab79c1929a2f8819e9ef6b725d844f8b': frozenset({'scoring.basket',\n",
" 'scoring.distance'})}"
]
},
"execution_count": 21,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -562,20 +536,20 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 18,
"id": "13914bf9-1f45-4860-8dc7-ec8eeb746bc0",
"metadata": {},
"outputs": [],
"source": [
"dataset = dak.from_json(os.path.join(\"data\", \"json\"))\n",
"free_throws = dak.str.match_substring(dataset.scoring.basket, \"freethrow\")\n",
"distances = dataset.scoring.distance[free_throws == False]\n",
"distances = dataset.scoring.distance[np.equal(free_throws, False)]\n",
"result = dak.mean(distances, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 19,
"id": "1bc6e94b-ee5e-42d1-b789-6f80859b1d64",
"metadata": {
"scrolled": true
Expand All @@ -584,11 +558,11 @@
{
"data": {
"text/plain": [
"{'from-json-files-6eebaf87f3a09a08c1234137dd381b61': frozenset({'scoring.basket',\n",
"{'from-json-files-3542a860e83d7f93e632ec19911d7030': frozenset({'scoring.basket',\n",
" 'scoring.distance'})}"
]
},
"execution_count": 23,
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -611,7 +585,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 20,
"id": "e444fa35-03ee-4292-8730-490dacd145fb",
"metadata": {},
"outputs": [],
Expand All @@ -626,7 +600,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 21,
"id": "146a84b4-26ce-45c5-ad16-9c8967b60214",
"metadata": {},
"outputs": [
Expand All @@ -642,7 +616,7 @@
" 'distance': {'type': 'number'}}}}}}"
]
},
"execution_count": 25,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -661,7 +635,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 22,
"id": "f2d75df4-c8a7-4abd-942c-f1e94c124ec7",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -697,7 +671,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.10.9"
}
},
"nbformat": 4,
Expand Down
5 changes: 3 additions & 2 deletions docs/examples/io-tutorial/io-01-advanced.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"source": [
"from __future__ import annotations\n",
"\n",
"import os\n",
"from typing import Any\n",
"\n",
"import awkward as ak\n",
Expand All @@ -57,7 +58,7 @@
"class Ignore0ParquetReader(ColumnProjectionMixin):\n",
" def __init__(\n",
" self,\n",
" form: Form,\n",
" form: ak.forms.Form,\n",
" report: bool = False,\n",
" allowed_exceptions: tuple[type[BaseException], ...] = (OSError,),\n",
" columns: list[str] | None = None,\n",
Expand Down Expand Up @@ -347,7 +348,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.10.9"
}
},
"nbformat": 4,
Expand Down
56 changes: 24 additions & 32 deletions src/dask_awkward/lib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2267,50 +2267,42 @@ def non_trivial_reduction(
else:
prepared_array = array

chunked_fn = _chunk_reducer_non_positional
tree_node_fn = _chunk_reducer_non_positional
concat_fn = _concat_reducer_non_positional
finalize_fn = _finalise_reducer_non_positional

chunked_kwargs = {
"reducer": reducer,
"is_axis_none": axis is None,
"mask_identity": mask_identity,
}
tree_node_kwargs = {
"reducer": combiner,
"is_axis_none": axis is None,
"mask_identity": mask_identity,
}

concat_kwargs = {"is_axis_none": axis is None}
finalize_kwargs = {
"reducer": combiner,
"mask_identity": mask_identity,
"keepdims": keepdims,
"is_axis_none": axis is None,
}

from dask_awkward.layers import AwkwardTreeReductionLayer

token = token or tokenize(
array,
reducer,
combiner,
label,
dtype,
split_every,
chunked_kwargs,
tree_node_kwargs,
concat_kwargs,
finalize_kwargs,
axis,
mask_identity,
keepdims,
)
name_tree_node = f"{label}-tree-node-{token}"
name_finalize = f"{label}-finalize-{token}"

chunked_fn = partial(chunked_fn, **chunked_kwargs)
tree_node_fn = partial(tree_node_fn, **tree_node_kwargs)
concat_fn = partial(concat_fn, **concat_kwargs)
finalize_fn = partial(finalize_fn, **finalize_kwargs)
chunked_fn = partial(
_chunk_reducer_non_positional,
reducer=reducer,
is_axis_none=axis is None,
mask_identity=mask_identity,
)
tree_node_fn = partial(
_chunk_reducer_non_positional,
reducer=combiner,
is_axis_none=axis is None,
mask_identity=mask_identity,
)
concat_fn = partial(_concat_reducer_non_positional, is_axis_none=axis is None)
finalize_fn = partial(
_finalise_reducer_non_positional,
reducer=combiner,
is_axis_none=axis is None,
keepdims=keepdims,
mask_identity=mask_identity,
)

if split_every is None:
split_every = dask.config.get("awkward.aggregation.split-every", 8)
Expand Down
Loading

0 comments on commit 5734d6b

Please sign in to comment.