Skip to content

Commit

Permalink
Merge pull request #75 from GiacomoPope/speed_up_decoding
Browse files Browse the repository at this point in the history
speed up decoding
  • Loading branch information
GiacomoPope authored Jul 24, 2024
2 parents 91bbd21 + 80c3415 commit 44e8761
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 21 deletions.
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,11 @@ The above example would also work with `ML_KEM_768` and `ML_KEM_1024`.

#### Benchmarks

| Params | keygen | keygen/s | encap | encap/s | decap | decap/s |
|------------|---------:|-----------:|--------:|----------:|--------:|---------:|
|ML-KEM-512 | 3.87ms| 258.47| 6.59ms| 151.79| 10.97ms| 91.15 |
|ML-KEM-768 | 5.85ms| 170.84| 9.67ms| 103.43| 15.83ms| 63.15 |
|ML-KEM-1024 | 8.52ms| 117.38| 13.31ms| 75.12| 21.58ms| 46.34 |
| Params | keygen | keygen/s | encap | encap/s | decap | decap/s |
|------------|---------:|-----------:|--------:|----------:|--------:|--------:|
| ML-KEM-512 | 3.84ms | 260.47 | 4.99ms | 200.44 | 6.40ms | 156.15 |
| ML-KEM-768 | 5.67ms | 176.26 | 7.15ms | 139.84 | 8.99ms | 111.27 |
| ML-KEM-1024| 8.32ms | 120.15 | 10.10ms | 99.02 | 12.40ms | 80.66 |

All times recorded using a Intel Core i7-9750H CPU and averaged over 1000 runs.

Expand Down Expand Up @@ -143,11 +143,11 @@ currently only support $q = 3329$ and $n = 256$.

#### Benchmarks

| Params | keygen | keygen/s | encap | encap/s | decap | decap/s |
|------------|---------:|-----------:|--------:|----------:|--------:|---------:|
|Kyber512 | 3.97ms| 252.17| 6.11ms| 163.70| 10.55ms| 94.80 |
|Kyber768 | 5.94ms| 168.49| 8.88ms| 112.64| 15.10ms| 66.21 |
|Kyber1024 | 8.52ms| 117.30| 12.17ms| 82.14| 20.48ms| 48.83 |
| Params | keygen | keygen/s | encap | encap/s | decap | decap/s |
|------------|---------:|-----------:|--------:|----------:|--------:|--------:|
| Kyber512 | 3.86ms | 258.85 | 4.43ms | 225.78 | 5.82ms | 171.72 |
| Kyber768 | 5.75ms | 173.96 | 6.38ms | 156.68 | 8.20ms | 121.93 |
| Kyber1024 | 8.26ms | 121.01 | 8.88ms | 112.60 | 11.15ms | 89.71 |

All times recorded using a Intel Core i7-9750H CPU and averaged over 1000 runs.

Expand Down
6 changes: 3 additions & 3 deletions benchmarks/benchmark_kyber.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def benchmark_kyber(Kyber, name, count):
avg_dec = sum(dec_times) / count
print(
f" {name:11} |"
f"{avg_keygen*1000:8.2f}ms {1/avg_keygen:11.2f}"
f"{avg_enc*1000:8.2f}ms {1/avg_enc:10.2f}"
f"{avg_dec*1000:8.2f}ms {1/avg_dec:8.2f}"
f"{avg_keygen*1000:7.2f}ms | {1/avg_keygen:10.2f} |"
f"{avg_enc*1000:6.2f}ms | {1/avg_enc:9.2f} |"
f"{avg_dec*1000:6.2f}ms | {1/avg_dec:7.2f} |"
)


Expand Down
6 changes: 3 additions & 3 deletions benchmarks/benchmark_ml_kem.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def benchmark_ml_kem(ML_KEM, name, count):
avg_dec = sum(dec_times) / count
print(
f" {name:11} |"
f"{avg_keygen*1000:8.2f}ms {1/avg_keygen:11.2f}"
f"{avg_enc*1000:8.2f}ms {1/avg_enc:10.2f}"
f"{avg_dec*1000:8.2f}ms {1/avg_dec:8.2f}"
f"{avg_keygen*1000:7.2f}ms | {1/avg_keygen:10.2f} |"
f"{avg_enc*1000:6.2f}ms | {1/avg_enc:9.2f} |"
f"{avg_dec*1000:6.2f}ms | {1/avg_dec:7.2f} |"
)


Expand Down
4 changes: 3 additions & 1 deletion src/kyber_py/ml_kem/ml_kem.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,9 @@ def _pke_encrypt(self, ek_pke, m, r):

# First check if the encap key has the right length
if len(ek_pke) != 384 * self.k + 32:
raise ValueError("Type check failed, ek_pke has the wrong length")
raise ValueError(
f"Type check failed, ek_pke has the wrong length, expected {384 * self.k + 32} bytes and received {len(ek_pke)} b"
)

# Unpack ek
t_hat_bytes, rho = ek_pke[:-32], ek_pke[-32:]
Expand Down
22 changes: 18 additions & 4 deletions src/kyber_py/polynomials/polynomials.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,25 @@ def decode(self, input_bytes, d, is_ntt=False):
else:
m = 2**d

# Parse the bits into coefficents of the polynomial
# Helper values
tmp, idx = 0, 0
bit_index = 0
mask = (1 << d) - 1
coeffs = [0 for _ in range(256)]
list_of_bits = bytes_to_bits(input_bytes)
for i in range(256):
coeffs[i] = sum(list_of_bits[i * d + j] << j for j in range(d)) % m

# Iterate through all bytes
for b in input_bytes:
tmp |= b << bit_index
bit_index += 8

while bit_index >= d:
# Set the coefficient
coeffs[idx] = (tmp & mask) % m

# Update helpers
bit_index -= d
tmp >>= d
idx += 1
return self(coeffs, is_ntt=is_ntt)

def __call__(self, coefficients, is_ntt=False):
Expand Down

0 comments on commit 44e8761

Please sign in to comment.