diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0306c7cf5..9a4b538f3 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,7 +18,7 @@ Change Log - Added extract_file_from_zip to zip_utils. - Added http_utils with download function. - Added get_app_specific_directory, get_os_name, and get_cpu_architecture_name to misc_utils. - - Added normalize_file_path to file_utils. + - Added normalize_file_path and are_files_equal to file_utils. - Added create_temporary_file_name and remove_temporary_file tmpfile_utils. - Minor fix to misc_utils.create_dict (do not create property only if its value is None). diff --git a/dcicutils/file_utils.py b/dcicutils/file_utils.py index 987595636..1cd62da6f 100644 --- a/dcicutils/file_utils.py +++ b/dcicutils/file_utils.py @@ -76,3 +76,23 @@ def normalize_file_path(path: str, home_directory: bool = True) -> str: if path.startswith(home_directory) and path != home_directory: path = "~/" + pathlib.Path(path).relative_to(home_directory).as_posix() return path + + +def are_files_equal(filea: str, fileb: str) -> bool: + """ + Returns True iff the contents of the two given files are exactly the same. + """ + try: + with open(filea, "rb") as fa: + with open(fileb, "rb") as fb: + chunk_size = 4096 + while True: + chunka = fa.read(chunk_size) + chunkb = fb.read(chunk_size) + if chunka != chunkb: + return False + if not chunka: + break + return True + except Exception: + return False diff --git a/pyproject.toml b/pyproject.toml index 1b447ce94..70517487e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.4.1b7" # TODO: To become 8.8.5 +version = "8.8.4.1b8" # TODO: To become 8.8.5 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"