Skip to content

Commit

Permalink
[build] add support for windows and change executable to unzip_http #17
Browse files Browse the repository at this point in the history
For the entrypoint to work, "unzip-http" needed to be given a module name
"unzip_http".
  • Loading branch information
anjakefala committed Jul 7, 2024
1 parent 9b48e93 commit a743cb6
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 125 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ jobs:
run: |
python setup.py develop
- name: Run basic test
run: unzip-http http://psa.download.navigation.com/automotive/PSA/RT6-SMEGx/M49RG20-Q0420-2001.ZIP DATA/CURR_VERS_NAVI.TXT
run: unzip_http.py http://psa.download.navigation.com/automotive/PSA/RT6-SMEGx/M49RG20-Q0420-2001.ZIP DATA/CURR_VERS_NAVI.TXT
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def requirements():
url="https://github.com/saulpw/unzip-http",
python_requires=">=3.8",
py_modules=["unzip_http"],
scripts=["unzip-http"],
scripts=["unzip_http.py"],
entry_points={
"console_scripts": ["unzip_http=unzip_http:main"],
},
install_requires=requirements(),
)
123 changes: 0 additions & 123 deletions unzip-http

This file was deleted.

122 changes: 122 additions & 0 deletions unzip_http.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

# Copyright (c) 2022 Saul Pwanson
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand All @@ -18,12 +20,38 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
usage: unzip-http [-h] [-l] [-f] [-o] url [files ...]
Extract individual files from .zip files over http without downloading the
entire archive. HTTP server must send `Accept-Ranges: bytes` and
`Content-Length` in headers.
positional arguments:
url URL of the remote zip file
files Files to extract. If no filenames given, displays .zip
contents (filenames and sizes). Each filename can be a
wildcard glob.
options:
-h, --help show this help message and exit
-l, --list List files in the remote zip file
-f, --full-filepaths Recreate folder structure from zip file when extracting
(instead of extracting the files to the current
directory)
-o, --stdout Write files to stdout (if multiple files: concatenate
them to stdout, in zipfile order)
"""

import sys
import os
import io
import math
import time
import zlib
import struct
import fnmatch
import argparse
import pathlib
import urllib.parse

Expand Down Expand Up @@ -263,3 +291,97 @@ def read(self, n):
self._buffer = self._buffer[n:]

return ret


### script start

class StreamProgress:
def __init__(self, fp, name='', total=0):
self.name = name
self.fp = fp
self.total = total
self.start_time = time.time()
self.last_update = 0
self.amtread = 0

def read(self, n):
r = self.fp.read(n)
self.amtread += len(r)
now = time.time()
if now - self.last_update > 0.1:
self.last_update = now

elapsed_s = now - self.start_time
sys.stderr.write(f'\r{elapsed_s:.0f}s {self.amtread/10**6:.02f}/{self.total/10**6:.02f}MB ({self.amtread/10**6/elapsed_s:.02f} MB/s) {self.name}')

if not r:
sys.stderr.write('\n')

return r


def list_files(rzf):
def safelog(x):
return 1 if x == 0 else math.ceil(math.log10(x))

digits_compr = max(safelog(f.compress_size) for f in rzf.infolist())
digits_plain = max(safelog(f.file_size ) for f in rzf.infolist())
fmtstr = f'%{digits_compr}d -> %{digits_plain}d\t%s'
for f in rzf.infolist():
print(fmtstr % (f.compress_size, f.file_size, f.filename), file=sys.stderr)


def extract_one(outfile, rzf, f, ofname):
print(f'Extracting {f.filename} to {ofname}...', file=sys.stderr)

fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size)
while r := fp.read(2**18):
outfile.write(r)


def download_file(f, rzf, args):
if not any(fnmatch.fnmatch(f.filename, g) for g in args.files):
return

if args.stdout:
extract_one(sys.stdout.buffer, rzf, f, "stdout")
else:
path = pathlib.Path(f.filename)
if args.full_filepaths:
path.parent.mkdir(parents=True, exist_ok=True)
else:
path = path.name

with open(str(path), 'wb') as of:
extract_one(of, rzf, f, str(path))


def run(args):
rzf = RemoteZipFile(args.url[0])
if args.list or len(args.files) == 0:
list_files(rzf)
else:
for f in rzf.infolist():
download_file(f, rzf, args)

def main():
parser = argparse.ArgumentParser(prog='unzip-http', \
description="Extract individual files from .zip files over http without downloading the entire archive. HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.")

parser.add_argument('-l', '--list', action='store_true', default=False,
help="List files in the remote zip file")
parser.add_argument('-f', '--full-filepaths', action='store_true', default=False,
help="Recreate folder structure from zip file when extracting (instead of extracting the files to the current directory)")
parser.add_argument('-o', '--stdout', action='store_true', default=False,
help="Write files to stdout (if multiple files: concatenate them to stdout, in zipfile order)")

parser.add_argument("url", nargs=1, help="URL of the remote zip file")
parser.add_argument("files", nargs='*', help="Files to extract. If no filenames given, displays .zip contents (filenames and sizes). Each filename can be a wildcard glob.")

args = parser.parse_args()
run(args)



if __name__ == '__main__':
main()

0 comments on commit a743cb6

Please sign in to comment.