From f386b2ae74bf18443836936941ae8bd1bfd40903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20=C5=A0a=C5=A1ko?= Date: Thu, 15 Feb 2024 17:27:37 +0100 Subject: [PATCH] Add `HfFileSystem.url` method (#2027) * Add url method * Apply suggestions from code review Co-authored-by: Lucain * Apply suggestions from code review Co-authored-by: Lucain * Fix --------- Co-authored-by: Lucain --- src/huggingface_hub/hf_file_system.py | 20 ++++++++++++++++++++ tests/test_hf_file_system.py | 10 ++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py index 1fec55ea02..08761eb528 100644 --- a/src/huggingface_hub/hf_file_system.py +++ b/src/huggingface_hub/hf_file_system.py @@ -577,6 +577,20 @@ def isfile(self, path): except: # noqa: E722 return False + def url(self, path: str) -> str: + """Get the HTTP URL of the given path""" + resolved_path = self.resolve_path(path) + url = hf_hub_url( + resolved_path.repo_id, + resolved_path.path_in_repo, + repo_type=resolved_path.repo_type, + revision=resolved_path.revision, + endpoint=self.endpoint, + ) + if self.isdir(path): + url = url.replace("/resolve/", "/tree/", 1) + return url + @property def transaction(self): """A context within which files are committed together upon exit @@ -653,6 +667,9 @@ def _upload_chunk(self, final: bool = False) -> None: path=self.resolved_path.unresolve(), ) + def url(self) -> str: + return self.fs.url(self.path) + class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile): def __init__( @@ -740,6 +757,9 @@ def read(self, length: int = -1): self.loc += len(out) return out + def url(self) -> str: + return self.fs.url(self.path) + def __del__(self): if not hasattr(self, "resolved_path"): # Means that the constructor failed. Nothing to do. diff --git a/tests/test_hf_file_system.py b/tests/test_hf_file_system.py index 02cf913515..af9bf3b94f 100644 --- a/tests/test_hf_file_system.py +++ b/tests/test_hf_file_system.py @@ -131,6 +131,16 @@ def test_glob(self): ) self.assertIsNotNone(files[keys[0]]["last_commit"]) + def test_url(self): + self.assertEqual( + self.hffs.url(self.hf_path + "/data/text_data.txt"), + f"{ENDPOINT_STAGING}/datasets/{self.repo_id}/resolve/main/data/text_data.txt", + ) + self.assertEqual( + self.hffs.url(self.hf_path + "/data"), + f"{ENDPOINT_STAGING}/datasets/{self.repo_id}/tree/main/data", + ) + def test_file_type(self): self.assertTrue( self.hffs.isdir(self.hf_path + "/data") and not self.hffs.isdir(self.hf_path + "/.gitattributes")