Skip to content

Commit

Permalink
Fix division-by-zero on import
Browse files Browse the repository at this point in the history
And clean up vestigial code

closes pulp#4777
  • Loading branch information
dralley authored and ggainey committed Dec 14, 2023
1 parent d0bc173 commit 5f6fe99
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 28 deletions.
1 change: 1 addition & 0 deletions CHANGES/4777.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug in import/export that could result in a division-by-zero during import.
28 changes: 14 additions & 14 deletions pulpcore/app/serializers/exporter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import re
from gettext import gettext as _
import re

from rest_framework import serializers
from rest_framework.validators import UniqueValidator
Expand All @@ -19,6 +19,16 @@
from pulpcore.constants import FS_EXPORT_CHOICES, FS_EXPORT_METHODS


def parse_human_readable_file_size(size: str):
# based on https://stackoverflow.com/a/42865957/2002471
units = {"B": 1, "KB": 2**10, "MB": 2**20, "GB": 2**30, "TB": 2**40}
size = size.upper()
if not re.match(r" ", size):
size = re.sub(r"([KMGT]?B)", r" \1", size)
number, unit = [string.strip() for string in size.split()]
return int(float(number) * units[unit])


class ExporterSerializer(ModelSerializer):
"""
Base serializer for Exporters.
Expand Down Expand Up @@ -208,23 +218,13 @@ def validate(self, data):
)
return super().validate(data)

@staticmethod
def _parse_size(size):
def validate_chunk_size(self, chunk_size):
try:
# based on https://stackoverflow.com/a/42865957/2002471
units = {"B": 1, "KB": 2**10, "MB": 2**20, "GB": 2**30, "TB": 2**40}
size = size.upper()
if not re.match(r" ", size):
size = re.sub(r"([KMGT]?B)", r" \1", size)
number, unit = [string.strip() for string in size.split()]
return int(float(number) * units[unit])
the_size = parse_human_readable_file_size(chunk_size)
except ValueError:
raise serializers.ValidationError(
_("chunk_size '{}' is not valid (valid units are B/KB/MB/GB/TB)").format(size)
_("chunk_size '{}' is not valid (valid units are B/KB/MB/GB/TB)").format(chunk_size)
)

def validate_chunk_size(self, chunk_size):
the_size = self._parse_size(chunk_size)
if the_size <= 0:
raise serializers.ValidationError(
_("Chunk size {} is not greater than zero!").format(the_size)
Expand Down
21 changes: 8 additions & 13 deletions pulpcore/app/tasks/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,12 +422,10 @@ def pulp_export(exporter_pk, params):
os.remove(pathname)
raise
# compute the hashes
global_hash = hasher()
paths = sorted([str(Path(p)) for p in glob(tarfile_fp + ".*")])
for a_file in paths:
a_hash = compute_file_hash(a_file, hasher=hasher(), cumulative_hash=global_hash)
a_hash = compute_file_hash(a_file, hasher=hasher())
rslts[a_file] = a_hash
tarfile_hash = global_hash.hexdigest()

else:
# write into the file
Expand All @@ -450,23 +448,20 @@ def pulp_export(exporter_pk, params):
# write outputfile/hash info to a file 'next to' the output file(s)
output_file_info_path = tarfile_fp.replace(".tar", "-toc.json")
with open(output_file_info_path, "w") as outfile:
if the_export.validated_chunk_size:
chunk_size = the_export.validated_chunk_size
else:
chunk_size = 0
chunk_toc = {
table_of_contents = {
"meta": {
"chunk_size": chunk_size,
"file": os.path.basename(tarfile_fp),
"global_hash": tarfile_hash,
"checksum_type": checksum_type,
},
"files": {},
}

if the_export.validated_chunk_size:
table_of_contents["meta"]["chunk_size"] = the_export.validated_chunk_size

# Build a toc with just filenames (not the path on the exporter-machine)
for a_path in rslts.keys():
chunk_toc["files"][os.path.basename(a_path)] = rslts[a_path]
json.dump(chunk_toc, outfile)
table_of_contents["files"][os.path.basename(a_path)] = rslts[a_path]
json.dump(table_of_contents, outfile)

# store toc info
toc_hash = compute_file_hash(output_file_info_path)
Expand Down
7 changes: 6 additions & 1 deletion pulpcore/app/tasks/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,17 @@ def __init__(self, toc_path):
raise ValidationError(_("Missing 'files' or 'meta' keys in table-of-contents!"))

toc_dir = os.path.dirname(toc_path)
self.chunk_size = int(self.toc["meta"]["chunk_size"])
# sorting-by-filename is REALLY IMPORTANT here
# keys are of the form <base-export-name>.00..<base-export-name>.NN,
# and must be reassembled IN ORDER
self.chunk_names = sorted(self.toc["files"].keys())
self.chunk_paths = [os.path.join(toc_dir, chunk_name) for chunk_name in self.chunk_names]
self.chunk_size = int(self.toc["meta"].get("chunk_size", 0))
if not self.chunk_size:
assert (
len(self.toc["files"]) == 1
), "chunk_size must exist and be non-zero if more than one chunk exists"
self.chunk_size = os.path.getsize(self.chunk_paths[0])

def __enter__(self):
assert not hasattr(self, "chunks"), "ChunkedFile is not reentrant."
Expand Down

0 comments on commit 5f6fe99

Please sign in to comment.