From cb5c4a51212f626770bb2618b8d0c4e9d128fcb2 Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Fri, 3 May 2024 15:31:34 -0400 Subject: [PATCH] Comments with github issues --- src/nested_pandas/series/ext_array.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/nested_pandas/series/ext_array.py b/src/nested_pandas/series/ext_array.py index 85bb731..71f1777 100644 --- a/src/nested_pandas/series/ext_array.py +++ b/src/nested_pandas/series/ext_array.py @@ -68,9 +68,7 @@ def to_pyarrow_dtype(dtype: NestedDtype | pd.ArrowDtype | pa.DataType | None) -> def replace_with_mask(array: pa.ChunkedArray, mask: pa.BooleanArray, value: pa.Array) -> pa.ChunkedArray: """Replace the elements of the array with the value where the mask is True""" # TODO: performance optimization - # It may be more performant to convert input arrays to a numpy array of PyArrow scalars and replace - # the values in numpy, then convert back to PyArrow. This way we can avoid the overhead of creating a - # large broadcast_value when we just need to replace a few values. + # https://github.com/lincc-frameworks/nested-pandas/issues/52 # If mask is [False, True, False, True], mask_cumsum will be [0, 1, 1, 2] # So we put value items to the right positions in broadcast_value, while duplicate some other items for @@ -169,8 +167,8 @@ def __getitem__(self, item): return type(self)(pa_array, validate=False) def __setitem__(self, key, value) -> None: - # TODO: optimize for many chunks: - # if key is not in some of the chunks, we can keep their original values + # TODO: optimize for many chunks + # https://github.com/lincc-frameworks/nested-pandas/issues/53 key = check_array_indexer(self, key) @@ -393,6 +391,7 @@ def copy(self) -> Self: # type: ignore[name-defined] # noqa: F821 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: # TODO: make formatted strings more pretty + # https://github.com/lincc-frameworks/nested-pandas/issues/50 if boxed: def box_formatter(value):