From e4d1444c957c7abeaf7ed4f551cc15508cad54d9 Mon Sep 17 00:00:00 2001
From: bhargav <bhargav.annem@gmail.com>
Date: Tue, 2 Jan 2024 20:48:55 -0600
Subject: [PATCH] chore: bench concrete

---
 BENCHMARK_CONCRETE.md | 42 +++++++++++++++++++++++++++++++++
 Cargo.lock            | 32 +++++++++++++++++++++++++
 Cargo.toml            |  3 ++-
 benches/concrete.rs   | 54 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 BENCHMARK_CONCRETE.md
 create mode 100644 benches/concrete.rs

diff --git a/BENCHMARK_CONCRETE.md b/BENCHMARK_CONCRETE.md
new file mode 100644
index 0000000..290a5ad
--- /dev/null
+++ b/BENCHMARK_CONCRETE.md
@@ -0,0 +1,42 @@
+# Benchmarks
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Benchmark Results](#benchmark-results)
+    - [Polynomial Multiplication Benchmarks](#polynomial-multiplication-benchmarks)
+
+## Overview
+
+This benchmark comparison report shows the difference in performance between parallel, NTT-based and serial, brute-force 
+polynomial multiplication algorithms. Each row entry in the first table is an n-degree forward NTT and each row entry in the second table represents an n-degree polynomial multiplication.
+
+Computer Stats:
+
+```
+CPU(s):                          16
+Thread(s) per core:              2
+Core(s) per socket:              8
+Socket(s):                       1
+```
+
+## Benchmark Results
+
+### Polynomial Multiplication Benchmarks
+
+|             | `NTT-Based`               | `Concrete-NTT`                       |
+|:------------|:--------------------------|:------------------------------------ |
+| **`64`**    | `982.22 us` (✅ **1.00x**) | `84.31 ns` (🚀 **11650.05x faster**)  |
+| **`128`**   | `1.18 ms` (✅ **1.00x**)   | `149.42 ns` (🚀 **7901.64x faster**)  |
+| **`256`**   | `2.03 ms` (✅ **1.00x**)   | `286.35 ns` (🚀 **7091.84x faster**)  |
+| **`512`**   | `2.75 ms` (✅ **1.00x**)   | `600.13 ns` (🚀 **4580.51x faster**)  |
+| **`1024`**  | `4.99 ms` (✅ **1.00x**)   | `1.32 us` (🚀 **3779.10x faster**)    |
+| **`2048`**  | `9.42 ms` (✅ **1.00x**)   | `2.74 us` (🚀 **3434.03x faster**)    |
+| **`4096`**  | `18.04 ms` (✅ **1.00x**)  | `5.84 us` (🚀 **3089.55x faster**)    |
+| **`8192`**  | `35.30 ms` (✅ **1.00x**)  | `12.27 us` (🚀 **2877.03x faster**)   |
+| **`16384`** | `72.15 ms` (✅ **1.00x**)  | `25.50 us` (🚀 **2829.18x faster**)   |
+| **`32768`** | `155.51 ms` (✅ **1.00x**) | `53.88 us` (🚀 **2886.10x faster**)   |
+
+---
+Made with [criterion-table](https://github.com/nu11ptr/criterion-table)
+
diff --git a/Cargo.lock b/Cargo.lock
index b9e92c9..07a68f1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11,6 +11,12 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "aligned-vec"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1"
+
 [[package]]
 name = "anes"
 version = "0.1.6"
@@ -41,6 +47,12 @@ version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
 
+[[package]]
+name = "bytemuck"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6"
+
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -105,6 +117,16 @@ version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
 
+[[package]]
+name = "concrete-ntt"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1167c583765f273205c691a0a8bff23a925aa5bc66f2448031e4a6e518cd64d7"
+dependencies = [
+ "aligned-vec",
+ "pulp",
+]
+
 [[package]]
 name = "criterion"
 version = "0.5.1"
@@ -204,6 +226,7 @@ dependencies = [
 name = "fast-ntt"
 version = "0.1.0"
 dependencies = [
+ "concrete-ntt",
  "criterion",
  "crypto-bigint",
  "hex",
@@ -447,6 +470,15 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "pulp"
+version = "0.11.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "866e8018d6397b0717100dd4a7948fc8cbc8c4b8ce3e39e98a0e1e878d3ba925"
+dependencies = [
+ "bytemuck",
+]
+
 [[package]]
 name = "quote"
 version = "1.0.33"
diff --git a/Cargo.toml b/Cargo.toml
index 2423a14..2e3ba9f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,7 @@ categories = ["cryptography", "data-structures"]
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+concrete-ntt = "0.1.1"
 criterion = "0.5.1"
 crypto-bigint = "0.5.3"
 hex = "0.4.3"
@@ -21,7 +22,7 @@ rayon = "1.8.0"
 criterion = { version = "0.5.1", features = ["html_reports"] }
 
 [[bench]]
-name = "benchmark"
+name = "concrete"
 harness = false
 
 [features]
diff --git a/benches/concrete.rs b/benches/concrete.rs
new file mode 100644
index 0000000..db3abf1
--- /dev/null
+++ b/benches/concrete.rs
@@ -0,0 +1,54 @@
+use concrete_ntt::prime32::Plan;
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use fast_ntt::{
+    ntt::{forward, working_modulus, Constants},
+    numbers::BigInt,
+    polynomial::{fast_mul, mul_brute, Polynomial, PolynomialFieldElement, PolynomialTrait},
+};
+use itertools::Itertools;
+
+const deg: usize = 16;
+
+fn bench_mul<T: PolynomialFieldElement>(x: usize, c: &Constants<T>) {
+    let ONE = T::from(1);
+    let a = Polynomial::new(vec![0; x].iter().map(|_| ONE).collect_vec());
+    let b = Polynomial::new(vec![0; x].iter().map(|_| ONE).collect_vec());
+    let _ = fast_mul(a, b, c);
+}
+
+fn bench_concrete<T: PolynomialFieldElement>(x: usize, plan: &Plan) {
+    let data = vec![1; x];
+
+    let mut transformed_fwd = data;
+    plan.fwd(&mut transformed_fwd);
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Polynomial Multiplication Benchmarks");
+
+    (6..deg).for_each(|n| {
+        let id = BenchmarkId::new("NTT-Based", 1 << n);
+        let N = BigInt::from((2 * n).next_power_of_two());
+        let M = N << 1 + 1;
+        let c = working_modulus(N, M);
+        group.bench_with_input(id, &n, |b, n| {
+            b.iter(|| bench_mul(black_box(1 << n), black_box(&c)))
+        });
+
+        let id = BenchmarkId::new("Concrete-NTT", 1 << n);
+
+        let N = (1 << n);
+        let p = 1062862849;
+        let plan = Plan::try_new(N, p).unwrap();
+        group.bench_with_input(id, &n, |b, n| {
+            b.iter(|| bench_concrete::<BigInt>(black_box(1 << n), black_box(&plan)))
+        });
+    });
+}
+
+criterion_group! {
+  name = benches;
+  config = Criterion::default().sample_size(10);
+  targets =  criterion_benchmark
+}
+criterion_main!(benches);