From a5dee767d1d67f604956c9b66e3dcd1773e674d6 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 6 May 2024 16:31:25 -0400 Subject: [PATCH] minor updates from pr-305 feedback --- dcicutils/file_utils.py | 20 ++++++++++++++----- dcicutils/http_utils.py | 44 ++++++++++++++++++++++++----------------- pyproject.toml | 2 +- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/dcicutils/file_utils.py b/dcicutils/file_utils.py index 60c62f1ac..defad2da5 100644 --- a/dcicutils/file_utils.py +++ b/dcicutils/file_utils.py @@ -104,21 +104,25 @@ def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expa return value -def get_file_size(file: str) -> Optional[int]: +def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]: try: return os.path.getsize(file) if isinstance(file, str) else None except Exception: + if raise_exception is True: + raise return None -def get_file_modified_datetime(file: str) -> Optional[datetime]: +def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]: try: return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None except Exception: + if raise_exception is True: + raise return None -def are_files_equal(filea: str, fileb: str) -> bool: +def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool: """ Returns True iff the contents of the two given files are exactly the same. """ @@ -135,10 +139,12 @@ def are_files_equal(filea: str, fileb: str) -> bool: break return True except Exception: + if raise_exception is True: + raise return False -def compute_file_md5(file: str) -> str: +def compute_file_md5(file: str, raise_exception: bool = True) -> str: """ Returns the md5 checksum for the given file. """ @@ -151,10 +157,12 @@ def compute_file_md5(file: str) -> str: md5.update(chunk) return md5.hexdigest() except Exception: + if raise_exception is True: + raise return "" -def compute_file_etag(file: str) -> Optional[str]: +def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]: """ Returns the AWS S3 "etag" for the given file; this value is md5-like but not the same as a normal md5. We use this to compare that a file in S3 @@ -164,6 +172,8 @@ def compute_file_etag(file: str) -> Optional[str]: with io.open(file, "rb") as f: return _compute_file_etag(f) except Exception: + if raise_exception is True: + raise return None diff --git a/dcicutils/http_utils.py b/dcicutils/http_utils.py index 9ccf743c5..a82da78bd 100644 --- a/dcicutils/http_utils.py +++ b/dcicutils/http_utils.py @@ -8,24 +8,32 @@ def download(url: str, suffix: Optional[str] = None, binary: bool = True, progress: Optional[Callable] = None) -> Optional[str]: """ - Context manager to ownload the given URL into a temporary file and yields the file - path to it. An optional file suffix may be specified. Defaults to binary file mode; - if this is not desired then pass False as the binary argument. + Context manager to download the given URL into a temporary file and yields the file + path to it. An optional file suffix may be specified for this temporary file name. + Defaults to binary file mode; if not desired then pass False as the binary argument. """ - if not callable(progress): - progress = None with temporary_file(suffix=suffix) as file: - response = requests.get(url, stream=True) - if progress: - nbytes = 0 - nbytes_total = None - if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit(): - nbytes_total = int(content_length) - with open(file, "wb" if binary is True else "w") as f: - for chunk in response.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - if progress: - nbytes += len(chunk) - progress(nbytes, nbytes_total) + download_to(url, file, binary=binary, progress=progress) yield file + + +def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None: + """ + Download the given URL into the given file. Defaults to binary + file mode; if not desired then pass False as the binary argument. + """ + if not callable(progress): + progress = None + response = requests.get(url, stream=True) + if progress: + nbytes = 0 + nbytes_total = None + if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit(): + nbytes_total = int(content_length) + with open(file, "wb" if binary is True else "w") as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + if progress: + nbytes += len(chunk) + progress(nbytes, nbytes_total) diff --git a/pyproject.toml b/pyproject.toml index 2cc997a12..da8d34ecb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.4.1b24" # TODO: To become 8.8.5 +version = "8.8.4.1b26" # TODO: To become 8.8.5 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"