Skip to content

Commit

Permalink
Compression bug (#14)
Browse files Browse the repository at this point in the history
* rework compression checks

* bump micro version for bug fix
  • Loading branch information
galtay authored Mar 27, 2019
1 parent f5cfd77 commit f396127
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
2 changes: 1 addition & 1 deletion qwikidata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"""Metadata for this package."""

__package_name__ = "qwikidata"
__version__ = "0.1.1"
__version__ = "0.1.2"
15 changes: 9 additions & 6 deletions qwikidata/json_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,12 @@ def __init__(self, filename: str) -> None:
if filename.endswith(".json"):
self.basename, _ = os.path.splitext(filename)
self.compression = None
elif filename.endswith((".json.bz2", ".json.gz")):
elif filename.endswith(".json.bz2"):
self.basename, _ = os.path.splitext(os.path.splitext(filename)[0])
self.compression = os.path.splitext(filename)[1]
self.compression = "bz2"
elif filename.endswith(".json.gz"):
self.basename, _ = os.path.splitext(os.path.splitext(filename)[0])
self.compression = "gz"
else:
raise ValueError('filename must end with ".json.bz2" or ".json.gz" or ".json"')

Expand All @@ -48,10 +51,10 @@ def _open_dump_file(self) -> Iterator[IO[Any]]:
It is important to open the file in binary mode even if it is not compressed. This allows us
to handle decoding in one place.
"""
if self.compression == ".bz2":
if self.compression == "bz2":
with bz2.open(self.filename, mode="rb") as fp:
yield fp
elif self.compression == ".gz":
elif self.compression == "gz":
with gzip.open(self.filename, mode="rb") as fp:
yield fp
else:
Expand Down Expand Up @@ -88,11 +91,11 @@ def _write_chunk(
elif out_format == "jsonl":
fp.write("\n".join(out_lines))

if self.compression == ".bz2":
if self.compression == "bz2":
args = ["bzip2", out_fname]
subprocess.check_output(args)
out_fname = f"{out_fname}.bz2"
elif self.compression == ".gz":
elif self.compression == "gz":
args = ["gzip", out_fname]
subprocess.check_output(args)
out_fname = f"{out_fname}.gz"
Expand Down

0 comments on commit f396127

Please sign in to comment.