Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH203 Split groupby with as_index (temptative) #1014

Merged
merged 9 commits into from
Oct 31, 2024
29 changes: 26 additions & 3 deletions pandas-stubs/core/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ from re import Pattern
from typing import (
Any,
ClassVar,
Generic,
Literal,
TypeVar,
overload,
)

Expand All @@ -25,7 +27,10 @@ from pandas import (
)
from pandas.core.arraylike import OpsMixin
from pandas.core.generic import NDFrame
from pandas.core.groupby.generic import DataFrameGroupBy
from pandas.core.groupby.generic import (
DataFrameGroupBy,
SeriesGroupBy,
)
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexers import BaseIndexer
from pandas.core.indexes.base import Index
Expand Down Expand Up @@ -74,6 +79,7 @@ from pandas._typing import (
Axis,
AxisColumn,
AxisIndex,
ByT,
CalculationMethod,
ColspaceArgType,
CompressionOptions,
Expand Down Expand Up @@ -229,6 +235,11 @@ class _LocIndexerFrame(_LocIndexer):
value: Scalar | NAType | NaTType | ArrayLike | Series | list | None,
) -> None: ...

_TT = TypeVar("TT", bound=Literal[True, False])

class DataFrameGroupByGen(DataFrameGroupBy[ByT], Generic[ByT, _TT]): ...
Dr-Irv marked this conversation as resolved.
Show resolved Hide resolved
class SeriesGroupByGen(SeriesGroupBy, Generic[_TT, ByT]): ...
Dr-Irv marked this conversation as resolved.
Show resolved Hide resolved

class DataFrame(NDFrame, OpsMixin):
__hash__: ClassVar[None] # type: ignore[assignment]

Expand Down Expand Up @@ -1052,17 +1063,29 @@ class DataFrame(NDFrame, OpsMixin):
errors: IgnoreRaise = ...,
) -> None: ...
@overload
def groupby( # type: ignore[overload-overlap] # pyright: ignore reportOverlappingOverload
self,
by: Scalar,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: Literal[True] = True,
sort: _bool = ...,
group_keys: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupByGen[Scalar, Literal[True]]: ...
@overload
def groupby(
self,
by: Scalar,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: _bool = ...,
as_index: Literal[False] = ...,
sort: _bool = ...,
group_keys: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupBy[Scalar]: ...
) -> DataFrameGroupByGen[Scalar, Literal[False]]: ...
@overload
def groupby(
self,
Expand Down
12 changes: 9 additions & 3 deletions pandas-stubs/core/groupby/groupby.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ from typing import (

import numpy as np
from pandas.core.base import SelectionMixin
from pandas.core.frame import DataFrame
from pandas.core.frame import (
DataFrame,
DataFrameGroupByGen,
)
from pandas.core.groupby import (
generic,
ops,
Expand Down Expand Up @@ -53,6 +56,7 @@ from pandas._typing import (
AnyArrayLike,
Axis,
AxisInt,
ByT,
CalculationMethod,
Dtype,
Frequency,
Expand Down Expand Up @@ -235,8 +239,10 @@ class GroupBy(BaseGroupBy[NDFrameT]):
@final
@overload
def size(self: GroupBy[Series]) -> Series[int]: ...
@overload # return type depends on `as_index` for dataframe groupby
def size(self: GroupBy[DataFrame]) -> DataFrame | Series[int]: ...
@overload
def size(self: DataFrameGroupByGen[ByT, Literal[True]]) -> Series[int]: ... # type: ignore[misc]
Dr-Irv marked this conversation as resolved.
Show resolved Hide resolved
@overload
def size(self: DataFrameGroupByGen[ByT, Literal[False]]) -> DataFrame: ... # type: ignore[misc]
@final
def sum(
self,
Expand Down
18 changes: 18 additions & 0 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,24 @@ def test_types_pivot_table() -> None:
)


def test_types_groupby_as_index() -> None:
df = pd.DataFrame({"a": [1, 2, 3]})
check(
assert_type(
df.groupby("a", as_index=False).size(),
pd.DataFrame,
),
pd.DataFrame,
)
check(
assert_type(
df.groupby("a", as_index=True).size(),
"pd.Series[int]",
),
pd.Series,
)


def test_types_groupby() -> None:
df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0]})
df.index.name = "ind"
Expand Down
Loading