Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: use ak.typetracer.XX high-level functions #351

Merged
merged 10 commits into from
Sep 6, 2023
1 change: 0 additions & 1 deletion src/dask_awkward/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
compatible_partitions,
map_partitions,
partition_compatibility,
typetracer_from_form,
)
from dask_awkward.lib.describe import fields
from dask_awkward.lib.inspect import necessary_columns, sample
Expand Down
13 changes: 4 additions & 9 deletions src/dask_awkward/layers/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,12 @@ def mock(self) -> tuple[AwkwardInputLayer, TypeTracerReport]:
from dask_awkward.lib._utils import set_form_keys

starting_form = copy.deepcopy(self._meta.layout.form)
starting_layout = starting_form.length_zero_array(highlevel=False)
new_meta = ak.Array(
starting_layout.to_typetracer(forget_length=True),
behavior=self._behavior,
)
form = new_meta.layout.form
set_form_keys(starting_form, key=self.name)

set_form_keys(form, key=self.name)
new_meta_array, report = ak.typetracer.typetracer_with_report(
starting_form, highlevel=True, behavior=self._behavior
)

new_meta_labelled, report = ak.typetracer.typetracer_with_report(form)
new_meta_array = ak.Array(new_meta_labelled, behavior=self._behavior)
new_input_layer = AwkwardInputLayer(
name=self.name,
columns=self.columns,
Expand Down
1 change: 0 additions & 1 deletion src/dask_awkward/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
compatible_partitions,
map_partitions,
partition_compatibility,
typetracer_from_form,
)
from dask_awkward.lib.describe import fields
from dask_awkward.lib.inspect import necessary_columns, sample
Expand Down
25 changes: 2 additions & 23 deletions src/dask_awkward/lib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2051,10 +2051,7 @@ def to_meta(objects):

def length_zero_array_or_identity(obj: Any) -> Any:
if is_awkward_collection(obj):
return ak.Array(
obj._meta.layout.form.length_zero_array(highlevel=False),
behavior=obj.behavior,
)
return ak.typetracer.length_zero_if_typetracer(obj._meta, behavior=obj.behavior)
return obj


Expand Down Expand Up @@ -2090,7 +2087,7 @@ def map_meta(fn: ArgsKwargsPackedFunction, *deps: Any) -> ak.Array | None:
pass
try:
arg_lzas = to_length_zero_arrays(deps)
meta = typetracer_from_form(fn(*arg_lzas).layout.form)
meta = ak.typetracer.typetracer_from_form(fn(*arg_lzas).layout.form)
return meta
except Exception:
# if compute-unknown-meta is True and we've gotten to this
Expand Down Expand Up @@ -2204,24 +2201,6 @@ def normalize_single_outer_inner_index(
return (int(partition_index), int(new_index))


def typetracer_from_form(form: Form) -> ak.Array:
"""Create a typetracer Array from an awkward form.

Parameters
----------
form : awkward.form.Form
Form that the resulting Array will have.

Returns
-------
awkward.Array
Resulting highlevel typetracer Array

"""
layout = form.length_zero_array(highlevel=False)
return ak.Array(layout.to_typetracer(forget_length=True))


def make_unknown_length(array: ak.Array) -> ak.Array:
"""Make any highlevel Array a highlevel typetracer Array with unknown length.

Expand Down
2 changes: 1 addition & 1 deletion src/dask_awkward/lib/io/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def to_dataframe(
**kwargs,
)
meta = ak.to_dataframe(
array._meta.layout.form.length_zero_array(highlevel=False), **kwargs
ak.typetracer.length_zero_if_typetracer(array._meta), **kwargs
)
return new_dd_object(
intermediate.dask,
Expand Down
7 changes: 2 additions & 5 deletions src/dask_awkward/lib/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,7 @@ def from_parquet(
if split_row_groups is None:
split_row_groups = row_counts is not None and len(row_counts) > 1

meta = ak.Array(
subform.length_zero_array(highlevel=False).to_typetracer(forget_length=True),
behavior=behavior,
)
meta = ak.typetracer.typetracer_from_form(subform, behavior=behavior)

if split_row_groups is False or subrg is None:
# file-wise
Expand All @@ -324,7 +321,7 @@ def from_parquet(
actual_paths,
label=label,
token=token,
meta=typetracer_array(meta),
meta=meta,
)
else:
# row-group wise
Expand Down
9 changes: 4 additions & 5 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
except ImportError:
import json # type: ignore[no-redef]

from awkward.typetracer import unknown_length

import dask_awkward as dak
from dask_awkward.lib.testutils import assert_eq

Expand Down Expand Up @@ -405,8 +407,5 @@ def test_from_awkward_empty_array(daa) -> None:
assert len(a1) == 0

# with a form
c2 = ak.Array(daa.layout.form.length_zero_array(highlevel=False))
assert len(c2) == 0
a2 = dak.from_awkward(c2, npartitions=1)
assert len(a2) == 0
daa.layout.form == a2.layout.form
c2 = ak.typetracer.typetracer_from_form(daa.layout.form)
assert c2.layout.length is unknown_length
Loading