From 2039bd7894ee651f3a63df1b3ea6421f00e6bdb5 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 21 Feb 2024 00:06:13 +0200 Subject: [PATCH] drop python 3.8 --- .github/workflows/tests.yml | 4 +- pyproject.toml | 3 +- src/dvc_objects/db.py | 13 +++--- src/dvc_objects/executors.py | 19 +++----- src/dvc_objects/fs/__init__.py | 6 +-- src/dvc_objects/fs/base.py | 81 ++++++++++++++++------------------ src/dvc_objects/fs/errors.py | 4 +- src/dvc_objects/fs/generic.py | 26 +++++------ src/dvc_objects/fs/utils.py | 17 +++---- src/dvc_objects/transfer.py | 6 +-- tests/fs/test_localfs.py | 5 +-- 11 files changed, 84 insertions(+), 100 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2138e1bd..1a495bf7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,9 +21,9 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, windows-latest, macos-latest] - pyv: ['3.8', '3.9', '3.10', '3.11', '3.12'] + pyv: ['3.9', '3.10', '3.11', '3.12'] include: - - {os: ubuntu-latest, pyv: 'pypy3.8'} + - {os: ubuntu-latest, pyv: 'pypy3.9'} - {os: macos-14, pyv: '3.12'} steps: diff --git a/pyproject.toml b/pyproject.toml index dba39ac9..8b782a75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,14 +12,13 @@ license = {text = "Apache-2.0"} authors = [{ name = "Iterative", email = "support@dvc.org" }] classifiers = [ "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Development Status :: 4 - Beta", ] -requires-python = ">=3.8" +requires-python = ">=3.9" dynamic = ["version"] dependencies = [ "funcy>=1.14; python_version < '3.12'", diff --git a/src/dvc_objects/db.py b/src/dvc_objects/db.py index bb730f28..8cf44762 100644 --- a/src/dvc_objects/db.py +++ b/src/dvc_objects/db.py @@ -1,5 +1,6 @@ import itertools import logging +from collections.abc import Iterable, Iterator from contextlib import suppress from functools import partial from io import BytesIO @@ -7,11 +8,7 @@ TYPE_CHECKING, BinaryIO, Callable, - Iterable, - Iterator, - List, Optional, - Tuple, Union, cast, ) @@ -139,9 +136,9 @@ def add_bytes(self, oid: str, data: Union[bytes, BinaryIO]) -> None: def add( self, - path: Union["AnyFSPath", List["AnyFSPath"]], + path: Union["AnyFSPath", list["AnyFSPath"]], fs: "FileSystem", - oid: Union[str, List[str]], + oid: Union[str, list[str]], hardlink: bool = False, callback: "Callback" = DEFAULT_CALLBACK, check_exists: bool = True, @@ -206,7 +203,7 @@ def clear(self): for oid in self.all(): self.delete(oid) - def _oid_parts(self, oid: str) -> Tuple[str, str]: + def _oid_parts(self, oid: str) -> tuple[str, str]: return oid[:2], oid[2:] def oid_to_path(self, oid) -> str: @@ -218,7 +215,7 @@ def _list_prefixes( jobs: Optional[int] = None, ) -> Iterator[str]: if prefixes: - paths: Union[str, List[str]] = list(map(self.oid_to_path, prefixes)) + paths: Union[str, list[str]] = list(map(self.oid_to_path, prefixes)) if len(paths) == 1: paths = paths[0] prefix = True diff --git a/src/dvc_objects/executors.py b/src/dvc_objects/executors.py index 68e72124..3208f490 100644 --- a/src/dvc_objects/executors.py +++ b/src/dvc_objects/executors.py @@ -1,20 +1,13 @@ import asyncio import queue import sys +from collections.abc import Coroutine, Iterable, Iterator, Sequence from concurrent import futures from itertools import islice from typing import ( Any, Callable, - Coroutine, - Dict, - Iterable, - Iterator, - List, Optional, - Sequence, - Set, - Tuple, TypeVar, ) @@ -50,7 +43,7 @@ def imap_unordered( yield fn(*args) return - def create_taskset(n: int) -> Set[futures.Future]: + def create_taskset(n: int) -> set[futures.Future]: return {self.submit(fn, *args) for args in islice(it, n)} tasks = create_taskset(self.max_workers * 5) @@ -99,7 +92,7 @@ async def batch_coros( # noqa: C901 timeout: Optional[int] = None, return_exceptions: bool = False, nofiles: bool = False, -) -> List[Any]: +) -> list[Any]: """Run the given in coroutines in parallel. The asyncio loop will be kept saturated with up to `batch_size` tasks in @@ -116,12 +109,12 @@ async def batch_coros( # noqa: C901 batch_size = len(coros) assert batch_size > 0 - def create_taskset(n: int) -> Dict[asyncio.Task, int]: + def create_taskset(n: int) -> dict[asyncio.Task, int]: return {asyncio.create_task(coro): i for i, coro in islice(it, n)} - it: Iterator[Tuple[int, Coroutine]] = enumerate(coros) + it: Iterator[tuple[int, Coroutine]] = enumerate(coros) tasks = create_taskset(batch_size) - results: Dict[int, Any] = {} + results: dict[int, Any] = {} while tasks: done, pending = await asyncio.wait( tasks.keys(), timeout=timeout, return_when=asyncio.FIRST_COMPLETED diff --git a/src/dvc_objects/fs/__init__.py b/src/dvc_objects/fs/__init__.py index f17cc37b..4667766f 100644 --- a/src/dvc_objects/fs/__init__.py +++ b/src/dvc_objects/fs/__init__.py @@ -1,5 +1,5 @@ -from collections.abc import Mapping -from typing import TYPE_CHECKING, Iterator, Type +from collections.abc import Iterator, Mapping +from typing import TYPE_CHECKING from urllib.parse import urlparse from . import generic # noqa: F401 @@ -83,7 +83,7 @@ class Registry(Mapping): def __init__(self, reg) -> None: self._registry = reg - def __getitem__(self, key: str) -> Type["FileSystem"]: + def __getitem__(self, key: str) -> type["FileSystem"]: entry = self._registry.get(key) or self._registry[Schemes.LOCAL] try: return _import_class(entry["class"]) diff --git a/src/dvc_objects/fs/base.py b/src/dvc_objects/fs/base.py index e0a741c8..919cf0ef 100644 --- a/src/dvc_objects/fs/base.py +++ b/src/dvc_objects/fs/base.py @@ -5,6 +5,7 @@ import os import posixpath import shutil +from collections.abc import Iterable, Iterator, Sequence from functools import partial from multiprocessing import cpu_count from typing import ( @@ -12,14 +13,8 @@ TYPE_CHECKING, Any, ClassVar, - Dict, - Iterable, - Iterator, - List, Literal, Optional, - Sequence, - Tuple, Union, overload, ) @@ -49,7 +44,7 @@ # An info() entry, might evolve to a TypedDict # in the future (e.g for properly type 'size' etc). -Entry = Dict[str, Any] +Entry = dict[str, Any] class LinkError(OSError): @@ -68,7 +63,7 @@ class FileSystem: flavour = posixpath protocol = "base" - REQUIRES: ClassVar[Dict[str, str]] = {} + REQUIRES: ClassVar[dict[str, str]] = {} _JOBS = 4 * cpu_count() HASH_JOBS = max(1, min(4, cpu_count() // 2)) @@ -90,13 +85,13 @@ def __init__(self, fs=None, **kwargs: Any): self.fs = fs @cached_property - def fs_args(self) -> Dict[str, Any]: + def fs_args(self) -> dict[str, Any]: ret = {"skip_instance_cache": True} ret.update(self._prepare_credentials(**self._config)) return ret @property - def config(self) -> Dict[str, Any]: + def config(self) -> dict[str, Any]: return self._config @property @@ -114,11 +109,11 @@ def join(cls, *parts: str) -> str: return cls.flavour.join(*parts) @classmethod - def split(cls, path: str) -> Tuple[str, str]: + def split(cls, path: str) -> tuple[str, str]: return cls.flavour.split(path) @classmethod - def splitext(cls, path: str) -> Tuple[str, str]: + def splitext(cls, path: str) -> tuple[str, str]: return cls.flavour.splitext(path) def normpath(self, path: str) -> str: @@ -147,7 +142,7 @@ def commonpath(cls, paths: Iterable[str]) -> str: return cls.flavour.commonpath(list(paths)) @classmethod - def parts(cls, path: str) -> Tuple[str, ...]: + def parts(cls, path: str) -> tuple[str, ...]: drive, path = cls.flavour.splitdrive(path.rstrip(cls.flavour.sep)) ret = [] @@ -229,7 +224,7 @@ def relpath(self, path: str, start: Optional[str] = None) -> str: start = "." return self.flavour.relpath(self.abspath(path), start=self.abspath(start)) - def relparts(self, path: str, start: Optional[str] = None) -> Tuple[str, ...]: + def relparts(self, path: str, start: Optional[str] = None) -> tuple[str, ...]: return self.parts(self.relpath(path, start=start)) @classmethod @@ -252,7 +247,7 @@ def version_aware(self) -> bool: return self._config.get("version_aware", False) @staticmethod - def _get_kwargs_from_urls(urlpath: str) -> "Dict[str, Any]": + def _get_kwargs_from_urls(urlpath: str) -> "dict[str, Any]": from fsspec.utils import infer_storage_options options = infer_storage_options(urlpath) @@ -262,14 +257,14 @@ def _get_kwargs_from_urls(urlpath: str) -> "Dict[str, Any]": def _prepare_credentials( self, - **config: Dict[str, Any], - ) -> Dict[str, Any]: + **config: dict[str, Any], + ) -> dict[str, Any]: """Prepare the arguments for authentication to the host filesystem""" return {} @classmethod - def get_missing_deps(cls) -> List[str]: + def get_missing_deps(cls) -> list[str]: from importlib.util import find_spec return [pkg for pkg, mod in cls.REQUIRES.items() if not find_spec(mod)] @@ -339,21 +334,21 @@ def read_block( def cat( self, - path: Union[AnyFSPath, List[AnyFSPath]], + path: Union[AnyFSPath, list[AnyFSPath]], recursive: bool = False, on_error: Literal["raise", "omit", "return"] = "raise", **kwargs: Any, - ) -> Union[bytes, Dict[AnyFSPath, bytes]]: + ) -> Union[bytes, dict[AnyFSPath, bytes]]: return self.fs.cat(path, recursive=recursive, on_error=on_error, **kwargs) def cat_ranges( self, - paths: List[AnyFSPath], - starts: List[int], - ends: List[int], + paths: list[AnyFSPath], + starts: list[int], + ends: list[int], max_gap: Optional[int] = None, **kwargs, - ) -> List[bytes]: + ) -> list[bytes]: return self.fs.cat_ranges(paths, starts, ends, max_gap=max_gap, **kwargs) def cat_file( @@ -409,7 +404,7 @@ def write_text( def pipe( self, - path: Union[AnyFSPath, Dict[AnyFSPath, bytes]], + path: Union[AnyFSPath, dict[AnyFSPath, bytes]], value: Optional[bytes] = None, **kwargs: Any, ) -> None: @@ -440,18 +435,18 @@ def exists( @overload def exists( self, - path: List[AnyFSPath], + path: list[AnyFSPath], callback: fsspec.Callback = ..., batch_size: Optional[int] = ..., - ) -> List[bool]: + ) -> list[bool]: ... def exists( self, - path: Union[AnyFSPath, List[AnyFSPath]], + path: Union[AnyFSPath, list[AnyFSPath]], callback: fsspec.Callback = DEFAULT_CALLBACK, batch_size: Optional[int] = None, - ) -> Union[bool, List[bool]]: + ) -> Union[bool, list[bool]]: if isinstance(path, str): return self.fs.exists(path) callback.set_size(len(path)) @@ -525,7 +520,7 @@ def ls(self, path, detail=False, **kwargs): def find( self, - path: Union[AnyFSPath, List[AnyFSPath]], + path: Union[AnyFSPath, list[AnyFSPath]], prefix: bool = False, batch_size: Optional[int] = None, **kwargs, @@ -565,7 +560,7 @@ def rm_file(self, path: AnyFSPath) -> None: def rm( self, - path: Union[AnyFSPath, List[AnyFSPath]], + path: Union[AnyFSPath, list[AnyFSPath]], recursive: bool = False, **kwargs, ) -> None: @@ -586,10 +581,10 @@ def info( @overload def info( self, - path: List[AnyFSPath], + path: list[AnyFSPath], callback: fsspec.Callback = ..., batch_size: Optional[int] = ..., - ) -> List["Entry"]: + ) -> list["Entry"]: ... def info(self, path, callback=DEFAULT_CALLBACK, batch_size=None, **kwargs): @@ -667,7 +662,7 @@ def glob(self, path: AnyFSPath, **kwargs: Any): def size(self, path: AnyFSPath) -> Optional[int]: return self.fs.size(path) - def sizes(self, paths: List[AnyFSPath]) -> List[Optional[int]]: + def sizes(self, paths: list[AnyFSPath]) -> list[Optional[int]]: return self.fs.sizes(paths) def du( @@ -676,13 +671,13 @@ def du( total: bool = True, maxdepth: Optional[int] = None, **kwargs: Any, - ) -> Union[int, Dict[AnyFSPath, int]]: + ) -> Union[int, dict[AnyFSPath, int]]: return self.fs.du(path, total=total, maxdepth=maxdepth, **kwargs) def put( self, - from_info: Union[AnyFSPath, List[AnyFSPath]], - to_info: Union[AnyFSPath, List[AnyFSPath]], + from_info: Union[AnyFSPath, list[AnyFSPath]], + to_info: Union[AnyFSPath, list[AnyFSPath]], callback: fsspec.Callback = DEFAULT_CALLBACK, recursive: bool = False, batch_size: Optional[int] = None, @@ -714,8 +709,8 @@ def put_file(from_path, to_path): def get( self, - from_info: Union[AnyFSPath, List[AnyFSPath]], - to_info: Union[AnyFSPath, List[AnyFSPath]], + from_info: Union[AnyFSPath, list[AnyFSPath]], + to_info: Union[AnyFSPath, list[AnyFSPath]], callback: fsspec.Callback = DEFAULT_CALLBACK, recursive: bool = False, batch_size: Optional[int] = None, @@ -730,8 +725,8 @@ def get_file(rpath, lpath, **kwargs): self.fs.get_file(rpath, lpath, callback=child, **kwargs) if isinstance(from_info, list) and isinstance(to_info, list): - from_infos: List[AnyFSPath] = from_info - to_infos: List[AnyFSPath] = to_info + from_infos: list[AnyFSPath] = from_info + to_infos: list[AnyFSPath] = to_info else: assert isinstance(from_info, str) assert isinstance(to_info, str) @@ -797,7 +792,7 @@ def mkdir( def find( self, - path: Union[AnyFSPath, List[AnyFSPath]], + path: Union[AnyFSPath, list[AnyFSPath]], prefix: bool = False, batch_size: Optional[int] = None, **kwargs, @@ -807,7 +802,7 @@ def find( else: paths = path - def _make_args(paths: List[AnyFSPath]) -> Iterator[Tuple[str, str]]: + def _make_args(paths: list[AnyFSPath]) -> Iterator[tuple[str, str]]: for path in paths: if prefix and not path.endswith(self.flavour.sep): parent = self.parent(path) diff --git a/src/dvc_objects/fs/errors.py b/src/dvc_objects/fs/errors.py index d23812b5..4c57a838 100644 --- a/src/dvc_objects/fs/errors.py +++ b/src/dvc_objects/fs/errors.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from .base import FileSystem @@ -18,7 +18,7 @@ def __init__( fs: "FileSystem", protocol: str, url: str, - missing: Optional[List[str]] = None, + missing: Optional[list[str]] = None, ) -> None: self.protocol = protocol self.fs = fs diff --git a/src/dvc_objects/fs/generic.py b/src/dvc_objects/fs/generic.py index a25df9a6..71e7c799 100644 --- a/src/dvc_objects/fs/generic.py +++ b/src/dvc_objects/fs/generic.py @@ -4,7 +4,7 @@ import os from contextlib import suppress from functools import wraps -from typing import TYPE_CHECKING, Any, Callable, List, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Optional, Union from fsspec.asyn import get_loop from fsspec.callbacks import DEFAULT_CALLBACK @@ -68,9 +68,9 @@ def _link( def copy( from_fs: "FileSystem", - from_path: Union["AnyFSPath", List["AnyFSPath"]], + from_path: Union["AnyFSPath", list["AnyFSPath"]], to_fs: "FileSystem", - to_path: Union["AnyFSPath", List["AnyFSPath"]], + to_path: Union["AnyFSPath", list["AnyFSPath"]], callback: "Callback" = DEFAULT_CALLBACK, batch_size: Optional[int] = None, on_error: Optional[TransferErrorHandler] = None, @@ -133,9 +133,9 @@ def _copy_one(from_p: "AnyFSPath", to_p: "AnyFSPath"): def _put( # noqa: C901 - from_paths: List["AnyFSPath"], + from_paths: list["AnyFSPath"], to_fs: "FileSystem", - to_paths: List["AnyFSPath"], + to_paths: list["AnyFSPath"], callback: "Callback" = DEFAULT_CALLBACK, batch_size: Optional[int] = None, on_error: Optional[TransferErrorHandler] = None, @@ -199,8 +199,8 @@ async def put_coro(from_path, to_path, **kwargs): def _get( # noqa: C901 from_fs: "FileSystem", - from_paths: List["AnyFSPath"], - to_paths: List["AnyFSPath"], + from_paths: list["AnyFSPath"], + to_paths: list["AnyFSPath"], callback: "Callback" = DEFAULT_CALLBACK, batch_size: Optional[int] = None, on_error: Optional[TransferErrorHandler] = None, @@ -265,7 +265,7 @@ async def _get_one_coro(from_path: "AnyFSPath", to_path: "AnyFSPath"): def _try_links( - links: List["str"], + links: list["str"], from_fs: "FileSystem", from_path: "AnyFSPath", to_fs: "FileSystem", @@ -310,11 +310,11 @@ def _try_links( def transfer( # noqa: PLR0912, C901 from_fs: "FileSystem", - from_path: Union["AnyFSPath", List["AnyFSPath"]], + from_path: Union["AnyFSPath", list["AnyFSPath"]], to_fs: "FileSystem", - to_path: Union["AnyFSPath", List["AnyFSPath"]], + to_path: Union["AnyFSPath", list["AnyFSPath"]], hardlink: bool = False, - links: Optional[List["str"]] = None, + links: Optional[list["str"]] = None, callback: "Callback" = DEFAULT_CALLBACK, batch_size: Optional[int] = None, on_error: Optional[TransferErrorHandler] = None, @@ -405,12 +405,12 @@ def _test_link( def test_links( - links: List["str"], + links: list["str"], from_fs: "FileSystem", from_path: "AnyFSPath", to_fs: "FileSystem", to_path: "AnyFSPath", -) -> List["AnyFSPath"]: +) -> list["AnyFSPath"]: from .utils import tmp_fname from_file = from_fs.join(from_path, tmp_fname()) diff --git a/src/dvc_objects/fs/utils.py b/src/dvc_objects/fs/utils.py index 921972c4..f7e0e1f7 100644 --- a/src/dvc_objects/fs/utils.py +++ b/src/dvc_objects/fs/utils.py @@ -5,10 +5,11 @@ import stat import sys import threading +from collections.abc import Collection, Iterator from concurrent import futures from contextlib import contextmanager, suppress from secrets import token_urlsafe -from typing import TYPE_CHECKING, Any, Collection, Dict, Iterator, Optional, Set, Union +from typing import TYPE_CHECKING, Any, Optional, Union from fsspec.callbacks import DEFAULT_CALLBACK @@ -207,7 +208,7 @@ def exists( file_paths: Union["AnyFSPath", Collection["AnyFSPath"]], callback: "Callback" = DEFAULT_CALLBACK, batch_size: Optional[int] = None, -) -> Dict[str, bool]: +) -> dict[str, bool]: """Return batched fs.exists results. Runs batched fs.exists() calls in parallel with fs.ls() until all paths @@ -221,7 +222,7 @@ def exists( return {path: fs.exists(path)} paths_lock = threading.Lock() - results: Dict[str, bool] = {} + results: dict[str, bool] = {} results_lock = threading.Lock() callback.set_size(len(paths)) jobs = batch_size or fs.jobs @@ -260,9 +261,9 @@ def exists( def _exist_query( fs: "FileSystem", - paths: Set["AnyFSPath"], + paths: set["AnyFSPath"], paths_lock: threading.Lock, - results: Dict[str, bool], + results: dict[str, bool], results_lock: threading.Lock, batch_size: int, callback: "Callback", @@ -282,9 +283,9 @@ def _exist_query( def _list_query( fs: "FileSystem", - paths: Set["AnyFSPath"], + paths: set["AnyFSPath"], paths_lock: threading.Lock, - results: Dict[str, bool], + results: dict[str, bool], results_lock: threading.Lock, callback: "Callback", ): @@ -294,7 +295,7 @@ def _list_query( with paths_lock: if not paths: return - kwargs: Dict[str, Any] = {} + kwargs: dict[str, Any] = {} if fs.version_aware: kwargs["versions"] = True contents = fs.ls(parent, **kwargs) diff --git a/src/dvc_objects/transfer.py b/src/dvc_objects/transfer.py index 0ce56248..80079495 100644 --- a/src/dvc_objects/transfer.py +++ b/src/dvc_objects/transfer.py @@ -1,12 +1,12 @@ -from typing import TYPE_CHECKING, Optional, Set +from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from .db import ObjectDB def transfer( - src: "ObjectDB", dest: "ObjectDB", oids: Set["str"], jobs: Optional[int] = None -) -> Set["str"]: + src: "ObjectDB", dest: "ObjectDB", oids: set["str"], jobs: Optional[int] = None +) -> set["str"]: src_exists = set(src.oids_exist(oids, jobs=jobs)) src_missing = oids - src_exists diff --git a/tests/fs/test_localfs.py b/tests/fs/test_localfs.py index 21e10dd3..a333bd0a 100644 --- a/tests/fs/test_localfs.py +++ b/tests/fs/test_localfs.py @@ -1,6 +1,5 @@ import os from os import fspath -from typing import Dict, List, Set, Tuple import pytest @@ -98,11 +97,11 @@ def test_walk(dir_path): def test_walk_detail(dir_path): fs = LocalFileSystem() - walk_results: List[Tuple[str, Dict[str, Dict], Dict[str, Dict]]] = list( + walk_results: list[tuple[str, dict[str, dict], dict[str, dict]]] = list( fs.walk(fspath(dir_path), detail=True) ) - expected: List[Tuple[str, Set[str], Set[str]]] = [ + expected: list[tuple[str, set[str], set[str]]] = [ (str(dir_path), {"data"}, {"code.py", "bar", "ั‚ะตัั‚", "foo"}), (str(dir_path / "data"), {"sub"}, {"file"}), (str(dir_path / "data" / "sub"), set(), {"file"}),