Skip to content

Commit

Permalink
minor updates from pr-305 feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
dmichaels-harvard committed May 6, 2024
1 parent 12456fb commit a5dee76
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 24 deletions.
20 changes: 15 additions & 5 deletions dcicutils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,21 +104,25 @@ def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expa
return value


def get_file_size(file: str) -> Optional[int]:
def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
try:
return os.path.getsize(file) if isinstance(file, str) else None
except Exception:
if raise_exception is True:
raise
return None


def get_file_modified_datetime(file: str) -> Optional[datetime]:
def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
try:
return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
except Exception:
if raise_exception is True:
raise
return None


def are_files_equal(filea: str, fileb: str) -> bool:
def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
"""
Returns True iff the contents of the two given files are exactly the same.
"""
Expand All @@ -135,10 +139,12 @@ def are_files_equal(filea: str, fileb: str) -> bool:
break
return True
except Exception:
if raise_exception is True:
raise
return False


def compute_file_md5(file: str) -> str:
def compute_file_md5(file: str, raise_exception: bool = True) -> str:
"""
Returns the md5 checksum for the given file.
"""
Expand All @@ -151,10 +157,12 @@ def compute_file_md5(file: str) -> str:
md5.update(chunk)
return md5.hexdigest()
except Exception:
if raise_exception is True:
raise
return ""


def compute_file_etag(file: str) -> Optional[str]:
def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
"""
Returns the AWS S3 "etag" for the given file; this value is md5-like but
not the same as a normal md5. We use this to compare that a file in S3
Expand All @@ -164,6 +172,8 @@ def compute_file_etag(file: str) -> Optional[str]:
with io.open(file, "rb") as f:
return _compute_file_etag(f)
except Exception:
if raise_exception is True:
raise
return None


Expand Down
44 changes: 26 additions & 18 deletions dcicutils/http_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,32 @@
def download(url: str, suffix: Optional[str] = None, binary: bool = True,
progress: Optional[Callable] = None) -> Optional[str]:
"""
Context manager to ownload the given URL into a temporary file and yields the file
path to it. An optional file suffix may be specified. Defaults to binary file mode;
if this is not desired then pass False as the binary argument.
Context manager to download the given URL into a temporary file and yields the file
path to it. An optional file suffix may be specified for this temporary file name.
Defaults to binary file mode; if not desired then pass False as the binary argument.
"""
if not callable(progress):
progress = None
with temporary_file(suffix=suffix) as file:
response = requests.get(url, stream=True)
if progress:
nbytes = 0
nbytes_total = None
if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
nbytes_total = int(content_length)
with open(file, "wb" if binary is True else "w") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
if progress:
nbytes += len(chunk)
progress(nbytes, nbytes_total)
download_to(url, file, binary=binary, progress=progress)
yield file


def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
"""
Download the given URL into the given file. Defaults to binary
file mode; if not desired then pass False as the binary argument.
"""
if not callable(progress):
progress = None
response = requests.get(url, stream=True)
if progress:
nbytes = 0
nbytes_total = None
if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
nbytes_total = int(content_length)
with open(file, "wb" if binary is True else "w") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
if progress:
nbytes += len(chunk)
progress(nbytes, nbytes_total)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicutils"
version = "8.8.4.1b24" # TODO: To become 8.8.5
version = "8.8.4.1b26" # TODO: To become 8.8.5
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
license = "MIT"
Expand Down

0 comments on commit a5dee76

Please sign in to comment.