From 6e9857b3e046d820745f3e1d433d02e4e1d9049a Mon Sep 17 00:00:00 2001 From: "William D. Jones" Date: Thu, 15 Aug 2024 20:42:40 -0400 Subject: [PATCH 1/3] Remove 0-added,0-removed modified entries from JSON output. --- src/borg/archiver.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 68cf0e2f1b..a387e3c332 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -1122,7 +1122,16 @@ def do_diff(self, args, repository, manifest, key, archive): """Diff contents of two archives""" def print_json_output(diff, path): - print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder)) + def actual_change(j): + if j["type"] == "modified": + # It's useful to show 0 added and 0 removed for text output + # but for JSON this is essentially noise. Additionally, the + # JSON key is "changes", and this is not actually a change. + return not (j["added"] == 0 and j["removed"] == 0) + else: + return True + + print(json.dumps({"path": path, "changes": [j for j, str in diff if actual_change(j)]}, sort_keys=True, cls=BorgJsonEncoder)) def print_text_output(diff, path): print("{:<19} {}".format(' '.join([str for j, str in diff]), path)) From f7673e3f066e8b8330084e079942d63cdbbbfb1a Mon Sep 17 00:00:00 2001 From: "William D. Jones" Date: Thu, 15 Aug 2024 22:26:03 -0400 Subject: [PATCH 2/3] Add test to make sure 0-added,0-removed modified changes aren't in JSON output. --- src/borg/testsuite/archiver.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 33d72819eb..643b79ae94 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -23,6 +23,7 @@ from hashlib import sha256 from io import BytesIO, StringIO from unittest.mock import patch +from pathlib import Path import pytest @@ -4472,6 +4473,7 @@ def test_basic_functionality(self): self.create_regular_file('file_removed', size=256) self.create_regular_file('file_removed2', size=512) self.create_regular_file('file_replaced', size=1024) + self.create_regular_file('file_touched', size=128) os.mkdir('input/dir_replaced_with_file') os.chmod('input/dir_replaced_with_file', stat.S_IFDIR | 0o755) os.mkdir('input/dir_removed') @@ -4500,6 +4502,7 @@ def test_basic_functionality(self): self.create_regular_file('file_replaced', contents=b'0' * 4096) os.unlink('input/file_removed') os.unlink('input/file_removed2') + Path('input/file_touched').touch() os.rmdir('input/dir_replaced_with_file') self.create_regular_file('dir_replaced_with_file', size=8192) os.chmod('input/dir_replaced_with_file', stat.S_IFREG | 0o755) @@ -4562,6 +4565,13 @@ def do_asserts(output, can_compare_ids, content_only=False): change = '0 B' if can_compare_ids else '{:<19}'.format('modified') self.assert_line_exists(lines, f"{change}.*input/empty") + # Show a 0 byte change for a file whose contents weren't modified + # for text output. + if content_only: + assert "input/file_touched" not in output + else: + self.assert_line_exists(lines, f"{change}.*input/file_touched") + if are_hardlinks_supported(): self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed") if are_symlinks_supported(): @@ -4610,6 +4620,16 @@ def get_changes(filename, data): # File unchanged assert not any(get_changes('input/file_unchanged', joutput)) + # Do NOT show a 0 byte change for a file whose contents weren't + # modified for JSON output. + unexpected = {'type': 'modified', 'added': 0, 'removed': 0} + assert unexpected not in get_changes('input/file_touched', joutput) + if not content_only: + assert {"ctime", "mtime"}.issubset({c["type"] for c in get_changes('input/file_touched', joutput)}) + else: + # And if we're doing content-only, don't show the file at all. + assert not any(get_changes('input/file_touched', joutput)) + # Directory replaced with a regular file if 'BORG_TESTS_IGNORE_MODES' not in os.environ and not content_only: assert {'type': 'mode', 'old_mode': 'drwxr-xr-x', 'new_mode': '-rwxr-xr-x'} in \ From 938e144f8b9674f00b3c081a727fcf12b2f2e3a8 Mon Sep 17 00:00:00 2001 From: "William D. Jones" Date: Sun, 18 Aug 2024 22:37:38 -0400 Subject: [PATCH 3/3] Ensure that 0B changes are hidden from text diffs too. --- src/borg/archiver.py | 22 ++++++++++++---------- src/borg/testsuite/__init__.py | 3 +++ src/borg/testsuite/archiver.py | 16 +++++++++------- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index a387e3c332..f8dc617520 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -1121,20 +1121,22 @@ def item_to_tarinfo(item, original_path): def do_diff(self, args, repository, manifest, key, archive): """Diff contents of two archives""" - def print_json_output(diff, path): - def actual_change(j): - if j["type"] == "modified": - # It's useful to show 0 added and 0 removed for text output - # but for JSON this is essentially noise. Additionally, the - # JSON key is "changes", and this is not actually a change. - return not (j["added"] == 0 and j["removed"] == 0) - else: - return True + def actual_change(j): + if j["type"] == "modified": + # Added/removed keys will not exist if chunker params differ + # between the two archives. Err on the side of caution and assume + # a real modification in this case (short-circuiting retrieving + # non-existent keys). + return not {"added", "removed"} <= j.keys() or not (j["added"] == 0 and j["removed"] == 0) + else: + # All other change types are indeed changes. + return True + def print_json_output(diff, path): print(json.dumps({"path": path, "changes": [j for j, str in diff if actual_change(j)]}, sort_keys=True, cls=BorgJsonEncoder)) def print_text_output(diff, path): - print("{:<19} {}".format(' '.join([str for j, str in diff]), path)) + print("{:<19} {}".format(' '.join([str for j, str in diff if actual_change(j)]), path)) print_output = print_json_output if args.json_lines else print_text_output diff --git a/src/borg/testsuite/__init__.py b/src/borg/testsuite/__init__.py index 7a3aaef37d..1955351f87 100644 --- a/src/borg/testsuite/__init__.py +++ b/src/borg/testsuite/__init__.py @@ -191,6 +191,9 @@ def assert_dirs_equal(self, dir1, dir2, **kwargs): def assert_line_exists(self, lines, expected_regexpr): assert any(re.search(expected_regexpr, line) for line in lines), f"no match for {expected_regexpr} in {lines}" + def assert_line_not_exists(self, lines, expected_regexpr): + assert not any(re.search(expected_regexpr, line) for line in lines), f"unexpected match for {expected_regexpr} in {lines}" + def _assert_dirs_equal_cmp(self, diff, ignore_flags=False, ignore_xattrs=False, ignore_ns=False): self.assert_equal(diff.left_only, []) self.assert_equal(diff.right_only, []) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 643b79ae94..34dca66825 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -4565,12 +4565,14 @@ def do_asserts(output, can_compare_ids, content_only=False): change = '0 B' if can_compare_ids else '{:<19}'.format('modified') self.assert_line_exists(lines, f"{change}.*input/empty") - # Show a 0 byte change for a file whose contents weren't modified - # for text output. - if content_only: - assert "input/file_touched" not in output + # Do not show a 0 byte change for a file whose contents weren't + # modified. + self.assert_line_not_exists(lines, '0 B.*input/file_touched') + if not content_only: + self.assert_line_exists(lines, "[cm]time:.*input/file_touched") else: - self.assert_line_exists(lines, f"{change}.*input/file_touched") + # And if we're doing content-only, don't show the file at all. + assert "input/file_touched" not in output if are_hardlinks_supported(): self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed") @@ -4620,8 +4622,8 @@ def get_changes(filename, data): # File unchanged assert not any(get_changes('input/file_unchanged', joutput)) - # Do NOT show a 0 byte change for a file whose contents weren't - # modified for JSON output. + # Do not show a 0 byte change for a file whose contents weren't + # modified. unexpected = {'type': 'modified', 'added': 0, 'removed': 0} assert unexpected not in get_changes('input/file_touched', joutput) if not content_only: