diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f7ccc098a..507f919d1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -21,6 +21,7 @@ Change Log - Added are_files_equal, create_random_file to file_utils, compute_file_md5, compute_file_etag, normalize_path, get_file_size, get_file_modified_datetime to file_utils. - Minor extra sanity check to search_for_file in file_utils. + - Added deterministic ordering to paths returned by search_for_file in file_utils. - Added create_temporary_file_name and remove_temporary_file tmpfile_utils. - Minor fix to misc_utils.create_dict (do not create property only if its value is None). diff --git a/dcicutils/file_utils.py b/dcicutils/file_utils.py index 8afdbc685..552d314ac 100644 --- a/dcicutils/file_utils.py +++ b/dcicutils/file_utils.py @@ -16,7 +16,8 @@ def search_for_file(file: str, location: Union[str, Optional[List[str]]] = None, recursive: bool = False, - single: bool = False) -> Union[List[str], Optional[str]]: + single: bool = False, + order: bool = True) -> Union[List[str], Optional[str]]: """ Searches for the existence of the given file name, first directly in the given directory or list of directories, if specified, and if not then just in the current (working) directory; if the @@ -25,6 +26,11 @@ def search_for_file(file: str, first file which is found is returns (as a string), or None if none; if the single flag is False, then all matched files are returned in a list, or and empty list if none. """ + def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]: + def order_by(path: str): + return len(path.split(os.path.sep)), path + return sorted(paths, key=order_by, reverse=True) + if not (file and isinstance(file, (str, pathlib.PosixPath))): return None if single is True else [] if os.path.isabs(file): @@ -74,9 +80,12 @@ def search_for_file(file: str, return file_found if file_found not in files_found: files_found.append(file_found) - if files_found: - return files_found[0] if single is True else files_found - return None if single is True else [] + if single is True: + return files_found[0] if files_found else None + elif order is True: + return order_by_fewest_number_of_paths_and_then_alphabetically(files_found) + else: + return files_found def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str: diff --git a/pyproject.toml b/pyproject.toml index 77bd9c31e..66fed4d25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.4.1b27" # TODO: To become 8.8.5 +version = "8.8.4.1b28" # TODO: To become 8.8.5 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"