Skip to content

Commit

Permalink
Add HfFileSystem.url method (#2027)
Browse files Browse the repository at this point in the history
* Add url method

* Apply suggestions from code review

Co-authored-by: Lucain <lucainp@gmail.com>

* Apply suggestions from code review

Co-authored-by: Lucain <lucainp@gmail.com>

* Fix

---------

Co-authored-by: Lucain <lucainp@gmail.com>
  • Loading branch information
mariosasko and Wauplin authored Feb 15, 2024
1 parent 434c60c commit f386b2a
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
20 changes: 20 additions & 0 deletions src/huggingface_hub/hf_file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,20 @@ def isfile(self, path):
except: # noqa: E722
return False

def url(self, path: str) -> str:
"""Get the HTTP URL of the given path"""
resolved_path = self.resolve_path(path)
url = hf_hub_url(
resolved_path.repo_id,
resolved_path.path_in_repo,
repo_type=resolved_path.repo_type,
revision=resolved_path.revision,
endpoint=self.endpoint,
)
if self.isdir(path):
url = url.replace("/resolve/", "/tree/", 1)
return url

@property
def transaction(self):
"""A context within which files are committed together upon exit
Expand Down Expand Up @@ -653,6 +667,9 @@ def _upload_chunk(self, final: bool = False) -> None:
path=self.resolved_path.unresolve(),
)

def url(self) -> str:
return self.fs.url(self.path)


class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile):
def __init__(
Expand Down Expand Up @@ -740,6 +757,9 @@ def read(self, length: int = -1):
self.loc += len(out)
return out

def url(self) -> str:
return self.fs.url(self.path)

def __del__(self):
if not hasattr(self, "resolved_path"):
# Means that the constructor failed. Nothing to do.
Expand Down
10 changes: 10 additions & 0 deletions tests/test_hf_file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,16 @@ def test_glob(self):
)
self.assertIsNotNone(files[keys[0]]["last_commit"])

def test_url(self):
self.assertEqual(
self.hffs.url(self.hf_path + "/data/text_data.txt"),
f"{ENDPOINT_STAGING}/datasets/{self.repo_id}/resolve/main/data/text_data.txt",
)
self.assertEqual(
self.hffs.url(self.hf_path + "/data"),
f"{ENDPOINT_STAGING}/datasets/{self.repo_id}/tree/main/data",
)

def test_file_type(self):
self.assertTrue(
self.hffs.isdir(self.hf_path + "/data") and not self.hffs.isdir(self.hf_path + "/.gitattributes")
Expand Down

0 comments on commit f386b2a

Please sign in to comment.