diff --git a/README.md b/README.md index 4d0e28d..c4ebd6b 100644 --- a/README.md +++ b/README.md @@ -81,20 +81,58 @@ https://xoflib.readthedocs.io/ ## Rough Benchmark +We find that `xoflib` performs equally with `hashlib` and is faster than `pycryptodome`. + +`xoflib` has the additional memory cost benefit as calling `c` bytes to be read from our XOF `n` times only needs `c` bytes of memory for each call, where as `hashlib` requires the potentially colossal amount of `n * c` bytes of memory which are then iterated over. + +We include two timings for `hashlib` -- one naive where `n * c` bytes are requested and iterated over slicing over bytes and a second which uses a wrapper by David Buchanan +[from this comment](https://github.com/pyca/cryptography/issues/9185#issuecomment-1868518432) which helps with the API but has the same memory usage issues. + +All times are derived by timing the computation of `c_0 ^ c_1 ^ ... c_(n-1)` for `n` chunks of `c` bytes: + +```py +def benchmark_xof(shake, absorb, c, n): + xof = shake(absorb).finalize() + res = bytes([0] * c) + for _ in range(n): + chunk = xof.read(c) + res = xor_bytes(res, chunk) + return res +``` + ``` -10_000 calls (read(1, 500)) with xoflib: 0.014404773712158203 -10_000 calls (read(1, 500)) with hashlib: 0.02388787269592285 -10_000 calls (read(1, 500)) with pycryptodome: 0.028993844985961914 --------------------------------------------------------------------------------- -1_000_000 single byte reads with xoflib: 0.16383790969848633 -1_000_000 single byte reads pycryptodome: 1.172316312789917 -100_000 block reads with xoflib: 0.6025588512420654 -100_000 block reads pycryptodome: 1.6401760578155518 --------------------------------------------------------------------------------- -10_000 calls (read(1, 5000)) with xoflib Shake128: 0.07348895072937012 -10_000 calls (read(1, 5000)) with xoflib Shake256: 0.08775138854980469 -10_000 calls (read(1, 5000)) with xoflib TurboShake128: 0.04633498191833496 -10_000 calls (read(1, 5000)) with xoflib TurboShake256: 0.056485891342163086 +================================================================================ + Benchmarking Shake256: +================================================================================ +Requesting 1 bytes from XOF 10000 times +xoflib: 0.69s +hashlib (single call): 0.65s +hashlib (streaming): 0.82s +pycryptodome: 1.82s + +Requesting 100 bytes from XOF 10000 times +xoflib: 6.65s +hashlib (single call): 6.57s +hashlib (streaming): 6.98s +pycryptodome: 7.83s + +Requesting 1000 bytes from XOF 1000 times +xoflib: 6.05s +hashlib (single call): 5.90s +hashlib (streaming): 6.15s +pycryptodome: 6.15s + +Requesting 10000 bytes from XOF 1000 times +xoflib: 5.82s +hashlib (single call): 5.77s +hashlib (streaming): 6.37s +pycryptodome: 5.85s + +Requesting 32 bytes from XOF 100000 times +xoflib: 2.71s +hashlib (single call): 2.63s +hashlib (streaming): 2.89s +pycryptodome: 3.83s ``` For more information, see the file [`benchmarks/benchmark_xof.py`](benchmarks/benchmark_xof.py). diff --git a/benchmarks/benchmark_xof.py b/benchmarks/benchmark_xof.py index c87dd04..1f7c806 100644 --- a/benchmarks/benchmark_xof.py +++ b/benchmarks/benchmark_xof.py @@ -1,91 +1,157 @@ -import random -from hashlib import shake_128 +from timeit import timeit +from hashlib import shake_128, shake_256 from xoflib import Shake128, Shake256, TurboShake128, TurboShake256 -import time from Crypto.Hash.SHAKE128 import SHAKE128_XOF +from Crypto.Hash.SHAKE256 import SHAKE256_XOF +from shake_wrapper import shake_128_hashlib, shake_256_hashlib -random.seed(0) -t0 = time.time() -xof = Shake128(b"123").finalize() -for _ in range(10_000): - n = random.randint(1, 500) - a = xof.read(n) -print(f"10_000 calls (read(1, 500)) with xoflib: {time.time() - t0 }") - -random.seed(0) -t0 = time.time() -for _ in range(10_000): - n = random.randint(1, 500) - a = shake_128(b"123").digest(n) -print(f"10_000 calls (read(1, 500)) with hashlib: {time.time() - t0 }") - -random.seed(0) -t0 = time.time() -xof = SHAKE128_XOF() -xof.update(b"123") -for _ in range(10_000): - n = random.randint(1, 500) - a = xof.read(n) -print(f"10_000 calls (read(1, 500)) with pycryptodome: {time.time() - t0 }") - -print("-" * 80) - -t0 = time.time() -xof = Shake128(b"123").finalize() -for _ in range(1_000_000): - a = xof.read(1) -print(f"1_000_000 single byte reads with xoflib: {time.time() - t0 }") - -t0 = time.time() -xof = SHAKE128_XOF() -xof.update(b"123") -for _ in range(1_000_000): - a = xof.read(1) -print(f"1_000_000 single byte reads pycryptodome: {time.time() - t0 }") - -t0 = time.time() -xof = Shake128(b"123").finalize() -for _ in range(1_000_000): - a = xof.read(168) -print(f"100_000 block reads with xoflib: {time.time() - t0 }") - -t0 = time.time() -xof = SHAKE128_XOF() -xof.update(b"123") -for _ in range(1_000_000): - a = xof.read(168) -print(f"100_000 block reads pycryptodome: {time.time() - t0 }") - -print("-" * 80) - -random.seed(0) -t0 = time.time() -xof = Shake128(b"123").finalize() -for _ in range(10_000): - n = random.randint(1, 5000) - a = xof.read(n) -print(f"10_000 calls (read(1, 5000)) with xoflib Shake128: {time.time() - t0 }") - -random.seed(0) -t0 = time.time() -xof = Shake256(b"123").finalize() -for _ in range(10_000): - n = random.randint(1, 5000) - a = xof.read(n) -print(f"10_000 calls (read(1, 5000)) with xoflib Shake256: {time.time() - t0 }") - -random.seed(0) -t0 = time.time() -xof = TurboShake128(1, b"123").finalize() -for _ in range(10_000): - n = random.randint(1, 5000) - a = xof.read(n) -print(f"10_000 calls (read(1, 5000)) with xoflib TurboShake128: {time.time() - t0 }") - -random.seed(0) -t0 = time.time() -xof = TurboShake256(1, b"123").finalize() -for _ in range(10_000): - n = random.randint(1, 5000) - a = xof.read(n) -print(f"10_000 calls (read(1, 5000)) with xoflib TurboShake256: {time.time() - t0 }") + +def xor_bytes(a, b): + return bytes(i ^ j for i, j in zip(a, b)) + + +def benchmark_xoflib_stream(shake, absorb, c, n): + xof = shake(absorb).finalize() + res = bytes([0] * c) + for _ in range(n): + chunk = xof.read(c) + res = xor_bytes(res, chunk) + return res + + +def benchmark_xoflib_turbo_stream(turboshake, absorb, c, n): + xof = turboshake(1, absorb).finalize() + res = bytes([0] * c) + for _ in range(n): + chunk = xof.read(c) + res = xor_bytes(res, chunk) + return res + + +def benchmark_hashlib_one_call(shake, absorb, c, n): + """ + Requires generating all c * n bytes in one go + """ + xof = shake(absorb).digest(c * n) + xof_chunks = [xof[i : i + c] for i in range(0, c * n, c)] + assert len(xof_chunks) == n + + res = bytes([0] * c) + for chunk in xof_chunks: + res = xor_bytes(res, chunk) + return res + + +def benchmark_hashlib_stream(shake, absorb, c, n): + """ + Requests only the bytes needed, but requires n calls to the digest + """ + res = bytes([0] * c) + xof = shake(absorb) + for _ in range(n): + chunk = xof.read(c) + res = xor_bytes(res, chunk) + return res + + +def benchmark_pycryptodome_stream(shake, absorb, c, n): + shake.__init__() + xof = shake.update(absorb) + res = bytes([0] * c) + for _ in range(n): + chunk = xof.read(c) + res = xor_bytes(res, chunk) + return res + + +# Ensure things work +a = benchmark_xoflib_stream(Shake128, b"benchmarking...", 123, 1000) +b = benchmark_hashlib_one_call(shake_128, b"benchmarking...", 123, 1000) +c = benchmark_hashlib_stream(shake_128_hashlib, b"benchmarking...", 123, 1000) +d = benchmark_pycryptodome_stream(SHAKE128_XOF(), b"benchmarking...", 123, 1000) +assert a == b == c == d + +benchmark_data = [ + (1, 10_000, 100), + (100, 10_000, 100), + (1000, 1000, 100), + (10_000, 1000, 10), + (32, 100_000, 10), +] + +for name, shakes in [ + ("Shake128: ", (Shake128, shake_128, shake_128_hashlib, SHAKE128_XOF())), + ("Shake256: ", (Shake256, shake_256, shake_256_hashlib, SHAKE256_XOF())), +]: + print("=" * 80) + print(f"Benchmarking {name}") + print("=" * 80) + for c, n, number in benchmark_data: + print(f"Requesting {c} bytes from XOF {n} times") + xoflib_time = timeit( + 'benchmark_xoflib_stream(shake, b"benchmarking...", c, n)', + globals={ + "shake": shakes[0], + "benchmark_xoflib_stream": benchmark_xoflib_stream, + "c": c, + "n": n, + }, + number=number, + ) + print(f"xoflib: {xoflib_time:.2f}s") + + hashlib_single_time = timeit( + 'benchmark_hashlib_one_call(shake, b"benchmarking...", c, n)', + globals={ + "shake": shakes[1], + "benchmark_hashlib_one_call": benchmark_hashlib_one_call, + "c": c, + "n": n, + }, + number=number, + ) + print(f"hashlib (single call): {hashlib_single_time:.2f}s") + + hashlib_stream_time = timeit( + 'benchmark_hashlib_stream(shake, b"benchmarking...", c, n)', + globals={ + "shake": shakes[2], + "benchmark_hashlib_stream": benchmark_hashlib_stream, + "c": c, + "n": n, + }, + number=number, + ) + print(f"hashlib (streaming): {hashlib_stream_time:.2f}s") + + pycryptodome_time = timeit( + 'benchmark_pycryptodome_stream(shake, b"benchmarking...", c, n)', + globals={ + "shake": shakes[3], + "benchmark_pycryptodome_stream": benchmark_pycryptodome_stream, + "c": c, + "n": n, + }, + number=number, + ) + print(f"pycryptodome: {pycryptodome_time:.2f}s") + print() + +for name, shake in [("TurboShake128", TurboShake128), ("TurboShake256", TurboShake256)]: + print("=" * 80) + print(f"Benchmarking {name}") + print("=" * 80) + for c, n, number in benchmark_data: + print(f"Requesting {c} bytes from XOF {n} times") + xoflib_time = timeit( + 'benchmark_xoflib_stream(shake, b"benchmarking...", c, n)', + globals={ + "shake": shakes[0], + "benchmark_xoflib_stream": benchmark_xoflib_stream, + "c": c, + "n": n, + }, + number=number, + ) + print(f"xoflib: {xoflib_time:.2f}s") + print() diff --git a/benchmarks/shake_wrapper.py b/benchmarks/shake_wrapper.py new file mode 100644 index 0000000..f8232c1 --- /dev/null +++ b/benchmarks/shake_wrapper.py @@ -0,0 +1,28 @@ +from hashlib import shake_128, shake_256 + +class ShakeStream: + """ + Written by David Buchanan + + Taken from: + https://github.com/pyca/cryptography/issues/9185#issuecomment-1868518432 + """ + def __init__(self, digestfn) -> None: + # digestfn is anything we can call repeatedly with different lengths + self.digest = digestfn + self.buf = self.digest(32) # arbitrary starting length + self.offset = 0 + + def read(self, n: int) -> bytes: + # double the buffer size until we have enough + while self.offset + n > len(self.buf): + self.buf = self.digest(len(self.buf) * 2) + res = self.buf[self.offset:self.offset + n] + self.offset += n + return res + +def shake_128_hashlib(absorb): + return ShakeStream(shake_128(absorb).digest) + +def shake_256_hashlib(absorb): + return ShakeStream(shake_256(absorb).digest)