From d00cd9391fa8e85f8479a949e224e4e949884887 Mon Sep 17 00:00:00 2001
From: hollymandel <holly.mandel@berkeley.edu>
Date: Fri, 4 Oct 2024 16:14:39 -0700
Subject: [PATCH] typing

---
 pyproject.toml                   |  3 +-
 xarray/coding/cftimeindex.py     |  2 +-
 xarray/core/dataarray.py         |  2 +-
 xarray/core/dataset.py           | 10 ++---
 xarray/core/extension_array.py   | 10 +++--
 xarray/core/indexes.py           | 12 +++---
 xarray/core/missing.py           | 66 +++++++++++++++-----------------
 xarray/core/utils.py             |  4 +-
 xarray/core/variable.py          |  2 +-
 xarray/groupers.py               |  2 +-
 xarray/namedarray/daskmanager.py |  8 ++--
 11 files changed, 61 insertions(+), 60 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c23d12ffba1..a84946d4123 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -125,6 +125,7 @@ module = [
   "netCDF4.*",
   "netcdftime.*",
   "opt_einsum.*",
+  "pandas.*",
   "pint.*",
   "pooch.*",
   "pyarrow.*",
@@ -178,7 +179,7 @@ module = [
   "xarray.tests.test_units",
   "xarray.tests.test_utils",
   "xarray.tests.test_variable",
-  "xarray.tests.test_weighted",
+  "xarray.tests.test_weighted"
 ]
 
 # Use strict = true whenever namedarray has become standalone. In the meantime
diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
index e85fa2736b2..5b11292ce30 100644
--- a/xarray/coding/cftimeindex.py
+++ b/xarray/coding/cftimeindex.py
@@ -517,7 +517,7 @@ def contains(self, key: Any) -> bool:
         """Needed for .loc based partial-string indexing"""
         return self.__contains__(key)
 
-    def shift(  # type: ignore[override]  # freq is typed Any, we are more precise
+    def shift(  # freq is typed Any, we are more precise
         self,
         periods: int | float,
         freq: str | timedelta | BaseCFTimeOffset | None = None,
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 8d460e492c6..e37e74e48a3 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3032,7 +3032,7 @@ def to_unstacked_dataset(self, dim: Hashable, level: int | Hashable = 0) -> Data
         if not isinstance(idx, pd.MultiIndex):
             raise ValueError(f"'{dim}' is not a stacked coordinate")
 
-        level_number = idx._get_level_number(level)  # type: ignore[attr-defined]
+        level_number = idx._get_level_number(level)
         variables = idx.levels[level_number]
         variable_dim = idx.names[level_number]
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index a7dedd2ed07..28f0ce16c61 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -6629,7 +6629,7 @@ def interpolate_na(
             | None
         ) = None,
         **kwargs: Any,
-    ) -> Self:
+    ) -> Dataset:
         """Fill in NaNs by interpolating according to different methods.
 
         Parameters
@@ -6760,7 +6760,7 @@ def interpolate_na(
         )
         return new
 
-    def ffill(self, dim: Hashable, limit: int | None = None) -> Self:
+    def ffill(self, dim: Hashable, limit: int | None = None) -> Dataset:
         """Fill NaN values by propagating values forward
 
         *Requires bottleneck.*
@@ -6824,7 +6824,7 @@ def ffill(self, dim: Hashable, limit: int | None = None) -> Self:
         new = _apply_over_vars_with_dim(ffill, self, dim=dim, limit=limit)
         return new
 
-    def bfill(self, dim: Hashable, limit: int | None = None) -> Self:
+    def bfill(self, dim: Hashable, limit: int | None = None) -> Dataset:
         """Fill NaN values by propagating values backward
 
         *Requires bottleneck.*
@@ -7523,7 +7523,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self:
 
         if isinstance(idx, pd.MultiIndex):
             dims = tuple(
-                name if name is not None else "level_%i" % n  # type: ignore[redundant-expr]
+                name if name is not None else "level_%i" % n
                 for n, name in enumerate(idx.names)
             )
             for dim, lev in zip(dims, idx.levels, strict=True):
@@ -9829,7 +9829,7 @@ def eval(
             c        (x) float64 40B 0.0 1.25 2.5 3.75 5.0
         """
 
-        return pd.eval(  # type: ignore[return-value]
+        return pd.eval(
             statement,
             resolvers=[self],
             target=self,
diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py
index b2efeae7bb0..7a6b30417b0 100644
--- a/xarray/core/extension_array.py
+++ b/xarray/core/extension_array.py
@@ -45,7 +45,7 @@ def __extension_duck_array__stack(arr: T_ExtensionArray, axis: int):
 def __extension_duck_array__concatenate(
     arrays: Sequence[T_ExtensionArray], axis: int = 0, out=None
 ) -> T_ExtensionArray:
-    return type(arrays[0])._concat_same_type(arrays)  # type: ignore[attr-defined]
+    return type(arrays[0])._concat_same_type(arrays)
 
 
 @implements(np.where)
@@ -57,8 +57,8 @@ def __extension_duck_array__where(
         and isinstance(y, pd.Categorical)
         and x.dtype != y.dtype
     ):
-        x = x.add_categories(set(y.categories).difference(set(x.categories)))  # type: ignore[assignment]
-        y = y.add_categories(set(x.categories).difference(set(y.categories)))  # type: ignore[assignment]
+        x = x.add_categories(set(y.categories).difference(set(x.categories)))
+        y = y.add_categories(set(x.categories).difference(set(y.categories)))
     return cast(T_ExtensionArray, pd.Series(x).where(condition, pd.Series(y)).array)
 
 
@@ -116,7 +116,9 @@ def __getitem__(self, key) -> PandasExtensionArray[T_ExtensionArray]:
         if is_extension_array_dtype(item):
             return type(self)(item)
         if np.isscalar(item):
-            return type(self)(type(self.array)([item]))  # type: ignore[call-arg]  # only subclasses with proper __init__ allowed
+            return type(self)(
+                type(self.array)([item])
+            )  # only subclasses with proper __init__ allowed
         return item
 
     def __setitem__(self, key, val):
diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
index 5abc2129e3e..5e9af04d6b2 100644
--- a/xarray/core/indexes.py
+++ b/xarray/core/indexes.py
@@ -740,7 +740,7 @@ def isel(
             # scalar indexer: drop index
             return None
 
-        return self._replace(self.index[indxr])  # type: ignore[index]
+        return self._replace(self.index[indxr])
 
     def sel(
         self, labels: dict[Any, Any], method=None, tolerance=None
@@ -926,7 +926,7 @@ def remove_unused_levels_categories(index: T_PDIndex) -> T_PDIndex:
         return cast(T_PDIndex, new_index)
 
     if isinstance(index, pd.CategoricalIndex):
-        return index.remove_unused_categories()  # type: ignore[attr-defined]
+        return index.remove_unused_categories()
 
     return index
 
@@ -1164,7 +1164,7 @@ def create_variables(
                 dtype = None
             else:
                 level = name
-                dtype = self.level_coords_dtype[name]  # type: ignore[index]  # TODO: are Hashables ok?
+                dtype = self.level_coords_dtype[name]  # TODO: are Hashables ok?
 
             var = variables.get(name, None)
             if var is not None:
@@ -1174,7 +1174,9 @@ def create_variables(
                 attrs = {}
                 encoding = {}
 
-            data = PandasMultiIndexingAdapter(self.index, dtype=dtype, level=level)  # type: ignore[arg-type]  # TODO: are Hashables ok?
+            data = PandasMultiIndexingAdapter(
+                self.index, dtype=dtype, level=level
+            )  # TODO: are Hashables ok?
             index_vars[name] = IndexVariable(
                 self.dim,
                 data,
@@ -1671,7 +1673,7 @@ def copy_indexes(
                 convert_new_idx = False
                 xr_idx = idx
 
-            new_idx = xr_idx._copy(deep=deep, memo=memo)  # type: ignore[assignment]
+            new_idx = xr_idx._copy(deep=deep, memo=memo)
             idx_vars = xr_idx.create_variables(coords)
 
             if convert_new_idx:
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index 549d9754953..404767af3c6 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -29,7 +29,6 @@
 if TYPE_CHECKING:
     from xarray.core.dataarray import DataArray
     from xarray.core.dataset import Dataset
-    from xarray.core.variable import IndexVariable
 
 
 def _get_nan_block_lengths(
@@ -146,7 +145,7 @@ def __init__(
         copy: bool = False,
         bounds_error: bool = False,
         order: Optional[int] = None,
-        axis=-1,
+        axis: int = -1,
         **kwargs,
     ):
         from scipy.interpolate import interp1d
@@ -167,8 +166,6 @@ def __init__(
         self.cons_kwargs = kwargs
         self.call_kwargs = {}
 
-        nan = np.nan if yi.dtype.kind != "c" else np.nan + np.nan * 1j
-
         self.f = interp1d(
             xi,
             yi,
@@ -192,13 +189,13 @@ class SplineInterpolator(BaseInterpolator):
 
     def __init__(
         self,
-        xi,
-        yi,
-        method="spline",
-        fill_value=None,
-        order=3,
-        nu=0,
-        ext=None,
+        xi: Variable,
+        yi: np.ndarray,
+        method: Optional[str | int] = "spline",
+        fill_value: Optional[float | complex] = None,
+        order: int = 3,
+        nu: Optional[float] = 0,
+        ext: Optional[int | str] = None,
         **kwargs,
     ):
         from scipy.interpolate import UnivariateSpline
@@ -216,7 +213,9 @@ def __init__(
         self.f = UnivariateSpline(xi, yi, k=order, **self.cons_kwargs)
 
 
-def _apply_over_vars_with_dim(func, self, dim=None, **kwargs):
+def _apply_over_vars_with_dim(
+    func: Callable, self: Dataset, dim: Optional[Hashable] = None, **kwargs
+) -> Dataset:
     """Wrapper for datasets"""
     ds = type(self)(coords=self.coords, attrs=self.attrs)
 
@@ -606,7 +605,7 @@ def _floatize_x(x, new_x):
 
 def interp(
     var: Variable,
-    indexes_coords: dict[str, IndexVariable],
+    indexes_coords: dict[Hashable, tuple[Any, Any]],
     method: InterpOptions,
     **kwargs,
 ) -> Variable:
@@ -671,9 +670,9 @@ def interp(
 
 
 def interp_func(
-    var: np.ndarray,
-    x: list[IndexVariable],
-    new_x: list[IndexVariable],
+    var: DataArray,
+    x: tuple[Variable, ...],
+    new_x: tuple[Variable, ...],
     method: InterpOptions,
     kwargs: dict,
 ) -> np.ndarray:
@@ -683,13 +682,10 @@ def interp_func(
 
     Parameters
     ----------
-    var : np.ndarray or dask.array.Array
-        Array to be interpolated. The final dimension is interpolated.
-    x : a list of 1d array.
-        Original coordinates. Should not contain NaN.
-    new_x : a list of 1d array
-        New coordinates. Should not contain NaN.
-    method : string
+    var : Array to be interpolated. The final dimension is interpolated.
+    x : Original coordinates. Should not contain NaN.
+    new_x : New coordinates. Should not contain NaN.
+    method :
         {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'pchip', 'akima',
             'makima', 'barycentric', 'krogh'} for 1-dimensional interpolation.
         {'linear', 'nearest'} for multidimensional interpolation
@@ -710,7 +706,7 @@ def interp_func(
     scipy.interpolate.interp1d
     """
     if not x:
-        return var.copy()
+        return var.data.copy()
 
     if len(x) == 1:
         func, kwargs = _get_interpolator(method, vectorizeable_only=True, **kwargs)
@@ -727,11 +723,11 @@ def interp_func(
 
         # blockwise args format
         x_arginds = [[_x, (nconst + index,)] for index, _x in enumerate(x)]
-        x_arginds = [item for pair in x_arginds for item in pair]
+        x_arginds = [item for pair in x_arginds for item in pair]  # type: ignore[misc]
         new_x_arginds = [
             [_x, [ndim + index for index in range(_x.ndim)]] for _x in new_x
         ]
-        new_x_arginds = [item for pair in new_x_arginds for item in pair]
+        new_x_arginds = [item for pair in new_x_arginds for item in pair]  # type: ignore[misc]
 
         args = (var, range(ndim), *x_arginds, *new_x_arginds)
 
@@ -741,13 +737,13 @@ def interp_func(
             elem for pair in zip(rechunked, args[1::2], strict=True) for elem in pair
         )
 
-        new_x = rechunked[1 + (len(rechunked) - 1) // 2 :]
+        new_x = rechunked[1 + (len(rechunked) - 1) // 2 :]  # type: ignore[assignment]
 
         new_x0_chunks = new_x[0].chunks
         new_x0_shape = new_x[0].shape
         new_x0_chunks_is_not_none = new_x0_chunks is not None
         new_axes = {
-            ndim + i: new_x0_chunks[i] if new_x0_chunks_is_not_none else new_x0_shape[i]
+            ndim + i: new_x0_chunks[i] if new_x0_chunks_is_not_none else new_x0_shape[i]  # type: ignore[index]
             for i in range(new_x[0].ndim)
         }
 
@@ -757,7 +753,7 @@ def interp_func(
         # scipy.interpolate.interp1d always forces to float.
         # Use the same check for blockwise as well:
         if not issubclass(var.dtype.type, np.inexact):
-            dtype = float
+            dtype = np.dtype(float)
         else:
             dtype = var.dtype
 
@@ -772,18 +768,18 @@ def interp_func(
             localize=localize,
             concatenate=True,
             dtype=dtype,
-            new_axes=new_axes,
+            new_axes=new_axes,  # type: ignore[arg-type]
             meta=meta,
             align_arrays=False,
         )
 
-    return _interpnd(var, x, new_x, func, kwargs)
+    return _interpnd(var.data, x, new_x, func, kwargs)
 
 
 def _interp1d(
     var: np.ndarray,
-    x: IndexVariable,
-    new_x: IndexVariable,
+    x: Variable,
+    new_x: Variable,
     func: Callable,
     kwargs: dict,
 ) -> np.ndarray:
@@ -798,8 +794,8 @@ def _interp1d(
 
 def _interpnd(
     var: np.ndarray,
-    x: list[IndexVariable],
-    new_x: list[IndexVariable],
+    x: tuple[Variable, ...],
+    new_x: tuple[Variable, ...],
     func: Callable,
     kwargs: dict,
 ) -> np.ndarray:
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index e5168342e1e..7c09571d937 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -132,7 +132,7 @@ def get_valid_numpy_dtype(array: np.ndarray | pd.Index) -> np.dtype:
     if not is_valid_numpy_dtype(array.dtype):
         return np.dtype("O")
 
-    return array.dtype  # type: ignore[return-value]
+    return array.dtype
 
 
 def maybe_coerce_to_str(index, original_coords):
@@ -180,7 +180,7 @@ def equivalent(first: T, second: T) -> bool:
         return duck_array_ops.array_equiv(first, second)
     if isinstance(first, list) or isinstance(second, list):
         return list_equiv(first, second)  # type: ignore[arg-type]
-    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
+    return (first == second) or (pd.isnull(first) and pd.isnull(second))
 
 
 def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index d8cf0fe7550..492b5a8c68e 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -151,7 +151,7 @@ def as_variable(
             ) from error
     elif utils.is_scalar(obj):
         obj = Variable([], obj)
-    elif isinstance(obj, pd.Index | IndexVariable) and obj.name is not None:
+    elif isinstance(obj, pd.Index | IndexVariable) and obj.name is not None:  # type: ignore[redundant-expr]
         obj = Variable(obj.name, obj)
     elif isinstance(obj, set | dict):
         raise TypeError(f"variable {name!r} has invalid type {type(obj)!r}")
diff --git a/xarray/groupers.py b/xarray/groupers.py
index e4cb884e6de..ce614997a4d 100644
--- a/xarray/groupers.py
+++ b/xarray/groupers.py
@@ -296,7 +296,7 @@ def factorize(self, group: T_Group) -> EncodedGroups:
 
         data = np.asarray(group.data)  # Cast _DummyGroup data to array
 
-        binned, self.bins = pd.cut(  # type: ignore [call-overload]
+        binned, self.bins = pd.cut(
             data.ravel(),
             bins=self.bins,
             right=self.right,
diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py
index a056f4e00bd..32ec5ce6c88 100644
--- a/xarray/namedarray/daskmanager.py
+++ b/xarray/namedarray/daskmanager.py
@@ -21,13 +21,13 @@
     try:
         from dask.array import Array as DaskArray
     except ImportError:
-        DaskArray = np.ndarray[Any, Any]
+        DaskArray = np.ndarray[Any, Any]  # type: ignore[misc,assignment]
 
 
 dask_available = module_available("dask")
 
 
-class DaskManager(ChunkManagerEntrypoint["DaskArray"]):
+class DaskManager(ChunkManagerEntrypoint["DaskArray"]):  # type: ignore[type-var]
     array_cls: type[DaskArray]
     available: bool = dask_available
 
@@ -91,7 +91,7 @@ def array_api(self) -> Any:
 
         return da
 
-    def reduction(
+    def reduction(  # type: ignore[override]
         self,
         arr: T_ChunkedArray,
         func: Callable[..., Any],
@@ -113,7 +113,7 @@ def reduction(
             keepdims=keepdims,
         )  # type: ignore[no-untyped-call]
 
-    def scan(
+    def scan(  # type: ignore[override]
         self,
         func: Callable[..., Any],
         binop: Callable[..., Any],