pandas-dev · Dr-Irv · Oct 31, 2024 · Oct 13, 2024 · Oct 16, 2024 · Oct 16, 2024
diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
@@ -12,7 +12,9 @@ from re import Pattern
 from typing import (
     Any,
     ClassVar,
+    Generic,
     Literal,
+    TypeVar,
     overload,
 )
 
@@ -25,7 +27,10 @@ from pandas import (
 )
 from pandas.core.arraylike import OpsMixin
 from pandas.core.generic import NDFrame
-from pandas.core.groupby.generic import DataFrameGroupBy
+from pandas.core.groupby.generic import (
+    DataFrameGroupBy,
+    SeriesGroupBy,
+)
 from pandas.core.groupby.grouper import Grouper
 from pandas.core.indexers import BaseIndexer
 from pandas.core.indexes.base import Index
@@ -74,6 +79,7 @@ from pandas._typing import (
     Axis,
     AxisColumn,
     AxisIndex,
+    ByT,
     CalculationMethod,
     ColspaceArgType,
     CompressionOptions,
@@ -229,6 +235,11 @@ class _LocIndexerFrame(_LocIndexer):
         value: Scalar | NAType | NaTType | ArrayLike | Series | list | None,
     ) -> None: ...
 
+_TT = TypeVar("TT", bound=Literal[True, False])
+
+class DataFrameGroupByGen(DataFrameGroupBy[ByT], Generic[ByT, _TT]): ...
+class SeriesGroupByGen(SeriesGroupBy, Generic[_TT, ByT]): ...
+
 class DataFrame(NDFrame, OpsMixin):
     __hash__: ClassVar[None]  # type: ignore[assignment]
 
@@ -1052,17 +1063,29 @@ class DataFrame(NDFrame, OpsMixin):
         errors: IgnoreRaise = ...,
     ) -> None: ...
     @overload
+    def groupby(  # type: ignore[overload-overlap] # pyright: ignore reportOverlappingOverload
+        self,
+        by: Scalar,
+        axis: AxisIndex | NoDefault = ...,
+        level: IndexLabel | None = ...,
+        as_index: Literal[True] = True,
+        sort: _bool = ...,
+        group_keys: _bool = ...,
+        observed: _bool | NoDefault = ...,
+        dropna: _bool = ...,
+    ) -> DataFrameGroupByGen[Scalar, Literal[True]]: ...
+    @overload
     def groupby(
         self,
         by: Scalar,
         axis: AxisIndex | NoDefault = ...,
         level: IndexLabel | None = ...,
-        as_index: _bool = ...,
+        as_index: Literal[False] = ...,
         sort: _bool = ...,
         group_keys: _bool = ...,
         observed: _bool | NoDefault = ...,
         dropna: _bool = ...,
-    ) -> DataFrameGroupBy[Scalar]: ...
+    ) -> DataFrameGroupByGen[Scalar, Literal[False]]: ...
     @overload
     def groupby(
         self,

diff --git a/pandas-stubs/core/groupby/groupby.pyi b/pandas-stubs/core/groupby/groupby.pyi
@@ -18,7 +18,10 @@ from typing import (
 
 import numpy as np
 from pandas.core.base import SelectionMixin
-from pandas.core.frame import DataFrame
+from pandas.core.frame import (
+    DataFrame,
+    DataFrameGroupByGen,
+)
 from pandas.core.groupby import (
     generic,
     ops,
@@ -53,6 +56,7 @@ from pandas._typing import (
     AnyArrayLike,
     Axis,
     AxisInt,
+    ByT,
     CalculationMethod,
     Dtype,
     Frequency,
@@ -235,8 +239,10 @@ class GroupBy(BaseGroupBy[NDFrameT]):
     @final
     @overload
     def size(self: GroupBy[Series]) -> Series[int]: ...
-    @overload  # return type depends on `as_index` for dataframe groupby
-    def size(self: GroupBy[DataFrame]) -> DataFrame | Series[int]: ...
+    @overload
+    def size(self: DataFrameGroupByGen[ByT, Literal[True]]) -> Series[int]: ...  # type: ignore[misc]
+    @overload
+    def size(self: DataFrameGroupByGen[ByT, Literal[False]]) -> DataFrame: ...  # type: ignore[misc]
     @final
     def sum(
         self,

diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -1025,6 +1025,24 @@ def test_types_pivot_table() -> None:
     )
 
 
+def test_types_groupby_as_index() -> None:
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    check(
+        assert_type(
+            df.groupby("a", as_index=False).size(),
+            pd.DataFrame,
+        ),
+        pd.DataFrame,
+    )
+    check(
+        assert_type(
+            df.groupby("a", as_index=True).size(),
+            "pd.Series[int]",
+        ),
+        pd.Series,
+    )
+
+
 def test_types_groupby() -> None:
     df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0]})
     df.index.name = "ind"