diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..94a800f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# unchecksum - Changelog + +### 0.0.2-alpha +- Add `-c, --compare` to compare two directories +- Raise error if path does not exist diff --git a/README.md b/README.md index 16b58ef..02d0b3d 100644 --- a/README.md +++ b/README.md @@ -13,12 +13,14 @@ Additional parameters: ``` -hs, --hash specify an hash between 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', 'blake2', 'md5' (default is blake2 for speed) -a, --action specify what action to take in case of different hashes ('warn' or 'overwrite') (default 'warn') +-c, --compare compare the given directory against specified one with the same directory and file structure/names against each other (specified after this argument) ``` +Note: if using the `--compare` argument the program will *not* calculate hashes, but simply compare two existing sets of hashes against each other. ### Example 1 - Checking the files in-place (same path) First run: ``` -python3 ./unchecksum.py "/disk1/photos" +python3 ./unchecksum.py "/disk1" ``` The program will generate hashes of all your files. @@ -26,24 +28,18 @@ The program will generate hashes of all your files. Assuming you have not moved the files around, you can re-run the same command to check for silent corruption. ### Example 2 - Checking the files after a copy (different path) -First run: +First calculate the hashes on both disks (or directories): ``` -python3 ./unchecksum.py "/disk1/photos" +python3 ./unchecksum.py "/disk1" +python3 ./unchecksum.py "/disk2" ``` +I recommend to run these commands in parallel using `tmux` if check two disks to cut the calculation time in half. The program will generate hashes of all your files. -You can now either: - -Option 1: Generate and save all hashes in the new location and compare them with a script (better if you want to save the hashes). -``` -python3 ./unchecksum.py "/disk2/photos" -``` - -Option 2: Rename the directory containing the hashes to make the program check them (not resorting to this "trick" would be an easy PR but I do not need it - feel free to contribute if you want) +You can now check the two directories against each other: ``` -mv files/disk1 files/disk2 -python3 ./unchecksum.py "/disk2/photos" +python3 ./unchecksum.py "files/disk1" -c "files/disk2" ``` ## FAQ diff --git a/unchecksum.py b/unchecksum.py index 0c9c024..32d5ece 100644 --- a/unchecksum.py +++ b/unchecksum.py @@ -18,6 +18,12 @@ type=str, help="What action to take in case of different hashes ('warn' or 'overwrite') (default 'warn')", ) +parser.add_argument( + "-c", + "--compare", + type=str, + help="Compare the given directory against specified one with the same directory and file structure/names against each other (specified after this argument)", +) args = parser.parse_args() hash_algorithms = { "sha1": hashlib.sha1, @@ -31,7 +37,6 @@ different_hashes = {} - def calculate_hash(filepath: str, hash_algorithm: str): calculated_hash = hash_algorithms[hash_algorithm]() with open(filepath, "rb") as f: @@ -78,24 +83,53 @@ def finder(path: str, hash_algorithm: str, action: str): save_hash(file_hash, filepath, file, hash_algorithm) +def compare_files(filename, hash1, hash2): + if hash1 != hash2: + return f"[Mismatch] {filename}\nHash 1: {hash1}\nHash 2: {hash2}\n" + + path = args.path hash_algorithm = args.hash action = args.action +compare = args.compare + +if not os.path.exists(path): + raise NameError("Specified path does not exist") -if args.action is None: - action = "warn" +if not compare: + if args.action is None: + action = "warn" -if args.hash is None: - hash_algorithm = "blake2" + if args.hash is None: + hash_algorithm = "blake2" -if hash_algorithm not in hash_algorithms: - raise Exception("Unsupported hash algorithm") + if hash_algorithm not in hash_algorithms: + raise Exception("Unsupported hash algorithm") -finder(path, hash_algorithm, action) + finder(path, hash_algorithm, action) -if not different_hashes: - print("No hash changes found.") + if not different_hashes: + print("No hash changes found.") + else: + for filepath in different_hashes: + print(f"Filepath: {filepath}\nOld hash: {different_hashes[filepath][0]}\nNew hash: {different_hashes[filepath][1]}") else: - for filepath in different_hashes: - print(f"Filepath: {filepath}\nOld hash: {different_hashes[filepath][0]}\nNew hash: {different_hashes[filepath][1]}") + mismatches = False + if not os.path.exists(compare): + raise NameError("Specified comparison path does not exist") + for root, directories, files in os.walk(path): + for file in files: + filepath = f"{root}/{file}" + print(filepath) + print(filepath.replace(path, compare)) + with open(filepath, "r") as f: + hash1 = f.read() + with open(filepath.replace(path, compare), "r") as f: + hash2 = f.read() + result = compare_files(f"{root}/{file}", hash1, hash2) + if result: + print(result) + mismatches = True + if not mismatches: + print("No hash differences found.")