From f396127edf1c1b2686ad23617f58dca934239eb2 Mon Sep 17 00:00:00 2001 From: Gabriel Altay Date: Wed, 27 Mar 2019 18:16:51 -0400 Subject: [PATCH] Compression bug (#14) * rework compression checks * bump micro version for bug fix --- qwikidata/__init__.py | 2 +- qwikidata/json_dump.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/qwikidata/__init__.py b/qwikidata/__init__.py index 0febb8a..3c05636 100644 --- a/qwikidata/__init__.py +++ b/qwikidata/__init__.py @@ -2,4 +2,4 @@ """Metadata for this package.""" __package_name__ = "qwikidata" -__version__ = "0.1.1" +__version__ = "0.1.2" diff --git a/qwikidata/json_dump.py b/qwikidata/json_dump.py index e7bca80..ff21f5d 100644 --- a/qwikidata/json_dump.py +++ b/qwikidata/json_dump.py @@ -32,9 +32,12 @@ def __init__(self, filename: str) -> None: if filename.endswith(".json"): self.basename, _ = os.path.splitext(filename) self.compression = None - elif filename.endswith((".json.bz2", ".json.gz")): + elif filename.endswith(".json.bz2"): self.basename, _ = os.path.splitext(os.path.splitext(filename)[0]) - self.compression = os.path.splitext(filename)[1] + self.compression = "bz2" + elif filename.endswith(".json.gz"): + self.basename, _ = os.path.splitext(os.path.splitext(filename)[0]) + self.compression = "gz" else: raise ValueError('filename must end with ".json.bz2" or ".json.gz" or ".json"') @@ -48,10 +51,10 @@ def _open_dump_file(self) -> Iterator[IO[Any]]: It is important to open the file in binary mode even if it is not compressed. This allows us to handle decoding in one place. """ - if self.compression == ".bz2": + if self.compression == "bz2": with bz2.open(self.filename, mode="rb") as fp: yield fp - elif self.compression == ".gz": + elif self.compression == "gz": with gzip.open(self.filename, mode="rb") as fp: yield fp else: @@ -88,11 +91,11 @@ def _write_chunk( elif out_format == "jsonl": fp.write("\n".join(out_lines)) - if self.compression == ".bz2": + if self.compression == "bz2": args = ["bzip2", out_fname] subprocess.check_output(args) out_fname = f"{out_fname}.bz2" - elif self.compression == ".gz": + elif self.compression == "gz": args = ["gzip", out_fname] subprocess.check_output(args) out_fname = f"{out_fname}.gz"