Skip to content

Commit

Permalink
[http- unzip_http] handle errors in retrieving URLs
Browse files Browse the repository at this point in the history
Make error output user-friendly, instead of showing a traceback.
for errors like: http.client.RemoteDisconnected: "Remote end closed
connection without response" and ConnectionError ("[Errno 104]
Connection reset by peer"). And urllib3.exceptions.MaxRetryError
in get_range() on a closed connection. And HTTP status codes other
than success in the zip file HEAD request.
  • Loading branch information
midichef committed Dec 26, 2024
1 parent efbe84e commit ce2e971
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
3 changes: 3 additions & 0 deletions visidata/loaders/http.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import http.client

from visidata import Path, RepeatFile, vd, VisiData
from visidata.loaders.tsv import splitter
Expand Down Expand Up @@ -55,6 +56,8 @@ def openurl_http(vd, path, filetype=None):
vd.fail(f'cannot open URL: HTTP Error {e.code}: {e.reason}')
except urllib.error.URLError as e:
vd.fail(f'cannot open URL: {e.reason}')
except (http.client.HTTPException, ConnectionError) as e:
vd.fail(f'cannot open URL: {e}')

filetype = filetype or vd.guessFiletype(path, response, funcprefix='guessurl_').get('filetype') # try guessing by url
filetype = filetype or vd.guessFiletype(path, funcprefix='guess_').get('filetype') # try guessing by contents
Expand Down
14 changes: 12 additions & 2 deletions visidata/loaders/unzip_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,13 @@ def namelist(self):
return list(r.filename for r in self.infoiter())

def infoiter(self):
resp = self.http.request('HEAD', self.url)
urllib3 = vd.importExternal('urllib3')
try:
resp = self.http.request('HEAD', self.url)
except urllib3.exceptions.HTTPError as e:
vd.fail(f'cannot open URL: HTTP Error {e}')
if not (200 <= resp.status <= 299):
vd.fail(f'cannot open URL: status code {resp.status}')
r = resp.headers.get('Accept-Ranges', '')
if r != 'bytes':
hostname = urllib.parse.urlparse(self.url).netloc
Expand Down Expand Up @@ -231,7 +237,11 @@ def extractall(self, path=None, members=None, pwd=None):
self.extract(fn, path, pwd=pwd)

def get_range(self, start, n):
return self.http.request('GET', self.url, headers={'Range': f'bytes={start}-{start+n-1}'}, preload_content=False)
urllib3 = vd.importExternal('urllib3')
try:
return self.http.request('GET', self.url, headers={'Range': f'bytes={start}-{start+n-1}'}, preload_content=False)
except urllib3.exceptions.HTTPError as e:
vd.fail(f'cannot open range for URL: HTTP Error {e}')

def matching_files(self, *globs):
for f in self.files.values():
Expand Down

0 comments on commit ce2e971

Please sign in to comment.