Skip to content

Commit

Permalink
Add s3a url support for S3 (#229)
Browse files Browse the repository at this point in the history
  • Loading branch information
mpenkov authored and menshikh-iv committed Sep 15, 2018
1 parent ec58647 commit d5c915a
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 7 deletions.
2 changes: 2 additions & 0 deletions smart_open/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
BINARY_NEWLINE = b'\n'
DEFAULT_BUFFER_SIZE = 128 * 1024

SUPPORTED_SCHEMES = ("s3", "s3n", 's3u', "s3a")


def _range_string(start, stop=None):
#
Expand Down
21 changes: 15 additions & 6 deletions smart_open/smart_open_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def _open_binary_stream(uri, mode, **kw):
# compression, if any, is determined by the filename extension (.gz, .bz2)
fobj = io.open(parsed_uri.uri_path, mode)
return fobj, filename
elif parsed_uri.scheme in ("s3", "s3n", 's3u'):
elif parsed_uri.scheme in smart_open_s3.SUPPORTED_SCHEMES:
return _s3_open_uri(parsed_uri, mode, **kw), filename
elif parsed_uri.scheme in ("hdfs", ):
if mode == 'rb':
Expand Down Expand Up @@ -404,10 +404,19 @@ def _parse_uri(uri_as_string):
"""
Parse the given URI from a string.
Supported URI schemes are "file", "s3", "s3n", "s3u" and "hdfs".
Supported URI schemes are:
* s3 and s3n are treated the same way.
* s3u is s3 but without SSL.
* file
* hdfs
* http
* https
* s3
* s3a
* s3n
* s3u
* webhdfs
.s3, s3a and s3n are treated the same way. s3u is s3 but without SSL.
Valid URI examples::
Expand Down Expand Up @@ -435,7 +444,7 @@ def _parse_uri(uri_as_string):
return _parse_uri_hdfs(parsed_uri)
elif parsed_uri.scheme == "webhdfs":
return _parse_uri_webhdfs(parsed_uri)
elif parsed_uri.scheme in ("s3", "s3n", "s3u"):
elif parsed_uri.scheme in smart_open_s3.SUPPORTED_SCHEMES:
return _parse_uri_s3x(parsed_uri)
elif parsed_uri.scheme in ('file', '', None):
return _parse_uri_file(parsed_uri)
Expand Down Expand Up @@ -469,7 +478,7 @@ def _parse_uri_webhdfs(parsed_uri):


def _parse_uri_s3x(parsed_uri):
assert parsed_uri.scheme in ("s3", "s3n", "s3u")
assert parsed_uri.scheme in smart_open_s3.SUPPORTED_SCHEMES

port = 443
host = boto.config.get('s3', 'host', 's3.amazonaws.com')
Expand Down
2 changes: 1 addition & 1 deletion smart_open/tests/test_smart_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class ParseUriTest(unittest.TestCase):
def test_scheme(self):
"""Do URIs schemes parse correctly?"""
# supported schemes
for scheme in ("s3", "s3n", "hdfs", "file", "http", "https"):
for scheme in ("s3", "s3a", "s3n", "hdfs", "file", "http", "https"):
parsed_uri = smart_open_lib._parse_uri(scheme + "://mybucket/mykey")
self.assertEqual(parsed_uri.scheme, scheme)

Expand Down

0 comments on commit d5c915a

Please sign in to comment.