From b071b90aec0d2ac2ce323b458e3944b3a69bab41 Mon Sep 17 00:00:00 2001 From: Jonas Schneider-Bensch Date: Wed, 13 Nov 2024 16:39:50 +0100 Subject: [PATCH 1/5] More efficient butterfly in inverse NTT layers 0-2 --- libcrux-intrinsics/src/avx2.rs | 4 +- libcrux-ml-dsa/src/ntt.rs | 240 ++++++++++++++-------------- libcrux-ml-dsa/src/simd/avx2.rs | 69 ++++++-- libcrux-ml-dsa/src/simd/avx2/ntt.rs | 138 +++++++++------- libcrux-ml-dsa/src/simd/portable.rs | 49 ++++-- libcrux-ml-dsa/src/simd/traits.rs | 29 +++- 6 files changed, 325 insertions(+), 204 deletions(-) diff --git a/libcrux-intrinsics/src/avx2.rs b/libcrux-intrinsics/src/avx2.rs index 0ee9467e8..da3dacfaf 100644 --- a/libcrux-intrinsics/src/avx2.rs +++ b/libcrux-intrinsics/src/avx2.rs @@ -549,8 +549,8 @@ pub fn mm256_set_epi64x(input3: i64, input2: i64, input1: i64, input0: i64) -> V } #[inline(always)] -pub fn mm256_unpacklo_epi64(a: Vec256, b: Vec256) -> Vec256 { - unsafe { _mm256_unpacklo_epi64(a, b) } +pub fn mm256_unpacklo_epi64(lhs: Vec256, rhs: Vec256) -> Vec256 { + unsafe { _mm256_unpacklo_epi64(lhs, rhs) } } #[inline(always)] diff --git a/libcrux-ml-dsa/src/ntt.rs b/libcrux-ml-dsa/src/ntt.rs index eb8f2424d..300de68ef 100644 --- a/libcrux-ml-dsa/src/ntt.rs +++ b/libcrux-ml-dsa/src/ntt.rs @@ -19,59 +19,77 @@ fn invert_ntt_at_layer_0(re: &mut PolynomialRingElement( re: &mut PolynomialRingElement, index: usize, - zeta_0: i32, - zeta_1: i32, - zeta_2: i32, - zeta_3: i32, + zeta00: i32, + zeta01: i32, + zeta02: i32, + zeta03: i32, + zeta10: i32, + zeta11: i32, + zeta12: i32, + zeta13: i32, ) { - re.simd_units[index] = - SIMDUnit::invert_ntt_at_layer_0(re.simd_units[index], zeta_0, zeta_1, zeta_2, zeta_3); + (re.simd_units[index], re.simd_units[index + 1]) = SIMDUnit::invert_ntt_at_layer_0( + re.simd_units[index], + re.simd_units[index + 1], + zeta00, + zeta01, + zeta02, + zeta03, + zeta10, + zeta11, + zeta12, + zeta13, + ); } - // macro_rules! round { - // ($i:literal, $zeta_0:literal, $zeta_1:literal, $zeta_2:literal, $zeta_3:literal) => { - // re.simd_units[$i] = SIMDUnit::invert_ntt_at_layer_0( - // re.simd_units[$i], - // $zeta_0, - // $zeta_1, - // $zeta_2, - // $zeta_3, - // ); - // }; - // } - - round(re, 0, 1976782, -846154, 1400424, 3937738); - round(re, 1, -1362209, -48306, 3919660, -554416); - round(re, 2, -3545687, 1612842, -976891, 183443); - round(re, 3, -2286327, -420899, -2235985, -2939036); - round(re, 4, -3833893, -260646, -1104333, -1667432); - round(re, 5, 1910376, -1803090, 1723600, -426683); - round(re, 6, 472078, 1717735, -975884, 2213111); - round(re, 7, 269760, 3866901, 3523897, -3038916); - round(re, 8, -1799107, -3694233, 1652634, 810149); - round(re, 9, 3014001, 1616392, 162844, -3183426); - round(re, 10, -1207385, 185531, 3369112, 1957272); - round(re, 11, -164721, 2454455, 2432395, -2013608); - round(re, 12, -3776993, 594136, -3724270, -2584293); - round(re, 13, -1846953, -1671176, -2831860, -542412); - round(re, 14, 3406031, 2235880, 777191, 1500165); - round(re, 15, -1374803, -2546312, 1917081, -1279661); - round(re, 16, -1962642, 3306115, 1312455, -451100); - round(re, 17, -1430225, -3318210, 1237275, -1333058); - round(re, 18, -1050970, 1903435, 1869119, -2994039); - round(re, 19, -3548272, 2635921, 1250494, -3767016); - round(re, 20, 1595974, 2486353, 1247620, 4055324); - round(re, 21, 1265009, -2590150, 2691481, 2842341); - round(re, 22, 203044, 1735879, -3342277, 3437287); - round(re, 23, 4108315, -2437823, 286988, 342297); - round(re, 24, -3595838, -768622, -525098, -3556995); - round(re, 25, 3207046, 2031748, -3122442, -655327); - round(re, 26, -522500, -43260, -1613174, 495491); - round(re, 27, 819034, 909542, 1859098, 900702); - round(re, 28, -3193378, -1197226, -3759364, -3520352); - round(re, 29, 3513181, -1235728, 2434439, 266997); - round(re, 30, -3562462, -2446433, 2244091, -3342478); - round(re, 31, 3817976, 2316500, 3407706, 2091667); + round( + re, 0, 1976782, -846154, 1400424, 3937738, -1362209, -48306, 3919660, -554416, + ); + round( + re, 2, -3545687, 1612842, -976891, 183443, -2286327, -420899, -2235985, -2939036, + ); + round( + re, 4, -3833893, -260646, -1104333, -1667432, 1910376, -1803090, 1723600, -426683, + ); + round( + re, 6, 472078, 1717735, -975884, 2213111, 269760, 3866901, 3523897, -3038916, + ); + round( + re, 8, -1799107, -3694233, 1652634, 810149, 3014001, 1616392, 162844, -3183426, + ); + round( + re, 10, -1207385, 185531, 3369112, 1957272, -164721, 2454455, 2432395, -2013608, + ); + round( + re, 12, -3776993, 594136, -3724270, -2584293, -1846953, -1671176, -2831860, -542412, + ); + round( + re, 14, 3406031, 2235880, 777191, 1500165, -1374803, -2546312, 1917081, -1279661, + ); + round( + re, 16, -1962642, 3306115, 1312455, -451100, -1430225, -3318210, 1237275, -1333058, + ); + round( + re, 18, -1050970, 1903435, 1869119, -2994039, -3548272, 2635921, 1250494, -3767016, + ); + round( + re, 20, 1595974, 2486353, 1247620, 4055324, 1265009, -2590150, 2691481, 2842341, + ); + round( + re, 22, 203044, 1735879, -3342277, 3437287, 4108315, -2437823, 286988, 342297, + ); + round( + re, 24, -3595838, -768622, -525098, -3556995, 3207046, 2031748, -3122442, -655327, + ); + round( + re, 26, -522500, -43260, -1613174, 495491, 819034, 909542, 1859098, 900702, + ); + round( + re, 28, -3193378, -1197226, -3759364, -3520352, 3513181, -1235728, 2434439, 266997, + ); + round( + re, 30, -3562462, -2446433, 2244091, -3342478, 3817976, 2316500, 3407706, 2091667, + ); } #[inline(always)] @@ -80,45 +98,37 @@ fn invert_ntt_at_layer_1(re: &mut PolynomialRingElement( re: &mut PolynomialRingElement, index: usize, - zeta_0: i32, - zeta_1: i32, + zeta_00: i32, + zeta_01: i32, + zeta_10: i32, + zeta_11: i32, ) { - re.simd_units[index] = - SIMDUnit::invert_ntt_at_layer_1(re.simd_units[index], zeta_0, zeta_1); + (re.simd_units[index], re.simd_units[index + 1]) = SIMDUnit::invert_ntt_at_layer_1( + re.simd_units[index], + re.simd_units[index + 1], + zeta_00, + zeta_01, + zeta_10, + zeta_11, + ); } - round(re, 0, 3839961, -3628969); - round(re, 1, -3881060, -3019102); - round(re, 2, -1439742, -812732); - round(re, 3, -1584928, 1285669); - round(re, 4, 1341330, 1315589); - round(re, 5, -177440, -2409325); - round(re, 6, -1851402, 3159746); - round(re, 7, -3553272, 189548); - round(re, 8, -1316856, 759969); - round(re, 9, -210977, 2389356); - round(re, 10, -3249728, 1653064); - round(re, 11, -8578, -3724342); - round(re, 12, 3958618, 904516); - round(re, 13, -1100098, 44288); - round(re, 14, 3097992, 508951); - round(re, 15, 264944, -3343383); - round(re, 16, -1430430, 1852771); - round(re, 17, 1349076, -381987); - round(re, 18, -1308169, -22981); - round(re, 19, -1228525, -671102); - round(re, 20, -2477047, -411027); - round(re, 21, -3693493, -2967645); - round(re, 22, 2715295, 2147896); - round(re, 23, -983419, 3412210); - round(re, 24, 126922, -3632928); - round(re, 25, -3157330, -3190144); - round(re, 26, -1000202, -4083598); - round(re, 27, 1939314, -1257611); - round(re, 28, -1585221, 2176455); - round(re, 29, 3475950, -1452451); - round(re, 30, -3041255, -3677745); - round(re, 31, -1528703, -3930395); + round(re, 0, 3839961, -3628969, -3881060, -3019102); + round(re, 2, -1439742, -812732, -1584928, 1285669); + round(re, 4, 1341330, 1315589, -177440, -2409325); + round(re, 6, -1851402, 3159746, -3553272, 189548); + round(re, 8, -1316856, 759969, -210977, 2389356); + round(re, 10, -3249728, 1653064, -8578, -3724342); + round(re, 12, 3958618, 904516, -1100098, 44288); + round(re, 14, 3097992, 508951, 264944, -3343383); + round(re, 16, -1430430, 1852771, 1349076, -381987); + round(re, 18, -1308169, -22981, -1228525, -671102); + round(re, 20, -2477047, -411027, -3693493, -2967645); + round(re, 22, 2715295, 2147896, -983419, 3412210); + round(re, 24, 126922, -3632928, -3157330, -3190144); + round(re, 26, -1000202, -4083598, 1939314, -1257611); + round(re, 28, -1585221, 2176455, 3475950, -1452451); + round(re, 30, -3041255, -3677745, -1528703, -3930395); } #[inline(always)] @@ -126,43 +136,33 @@ fn invert_ntt_at_layer_2(re: &mut PolynomialRingElement( re: &mut PolynomialRingElement, index: usize, - zeta: i32, + zeta1: i32, + zeta2: i32, ) { - re.simd_units[index] = SIMDUnit::invert_ntt_at_layer_2(re.simd_units[index], zeta); + (re.simd_units[index], re.simd_units[index + 1]) = SIMDUnit::invert_ntt_at_layer_2( + re.simd_units[index], + re.simd_units[index + 1], + zeta1, + zeta2, + ); } - round(re, 0, -2797779); - round(re, 1, 2071892); - round(re, 2, -2556880); - round(re, 3, 3900724); - round(re, 4, 3881043); - round(re, 5, 954230); - round(re, 6, 531354); - round(re, 7, 811944); - round(re, 8, 3699596); - round(re, 9, -1600420); - round(re, 10, -2140649); - round(re, 11, 3507263); - round(re, 12, -3821735); - round(re, 13, 3505694); - round(re, 14, -1643818); - round(re, 15, -1699267); - round(re, 16, -539299); - round(re, 17, 2348700); - round(re, 18, -300467); - round(re, 19, 3539968); - round(re, 20, -2867647); - round(re, 21, 3574422); - round(re, 22, -3043716); - round(re, 23, -3861115); - round(re, 24, 3915439); - round(re, 25, -2537516); - round(re, 26, -3592148); - round(re, 27, -1661693); - round(re, 28, 3530437); - round(re, 29, 3077325); - round(re, 30, 95776); - round(re, 31, 2706023); + round(re, 0, -2797779, 2071892); + round(re, 2, -2556880, 3900724); + round(re, 4, 3881043, 954230); + round(re, 6, 531354, 811944); + round(re, 8, 3699596, -1600420); + round(re, 10, -2140649, 3507263); + round(re, 12, -3821735, 3505694); + round(re, 14, -1643818, -1699267); + round(re, 16, -539299, 2348700); + round(re, 18, -300467, 3539968); + round(re, 20, -2867647, 3574422); + round(re, 22, -3043716, -3861115); + round(re, 24, 3915439, -2537516); + round(re, 26, -3592148, -1661693); + round(re, 28, 3530437, 3077325); + round(re, 30, 95776, 2706023); } #[inline(always)] diff --git a/libcrux-ml-dsa/src/simd/avx2.rs b/libcrux-ml-dsa/src/simd/avx2.rs index 608f37add..6c1c9f1c7 100644 --- a/libcrux-ml-dsa/src/simd/avx2.rs +++ b/libcrux-ml-dsa/src/simd/avx2.rs @@ -138,26 +138,73 @@ impl Operations for AVX2SIMDUnit { #[inline(always)] #[allow(unsafe_code)] fn invert_ntt_at_layer_0( - simd_unit: Self, - zeta0: i32, - zeta1: i32, - zeta2: i32, - zeta3: i32, - ) -> Self { + simd_unit0: Self, + simd_unit1: Self, + zeta00: i32, + zeta01: i32, + zeta02: i32, + zeta03: i32, + zeta10: i32, + zeta11: i32, + zeta12: i32, + zeta13: i32, + ) -> (Self, Self) { unsafe { - ntt::invert_ntt_at_layer_0(simd_unit.coefficients, zeta0, zeta1, zeta2, zeta3).into() + let (a, b) = ntt::invert_ntt_at_layer_0( + simd_unit0.coefficients, + simd_unit1.coefficients, + zeta00, + zeta01, + zeta02, + zeta03, + zeta10, + zeta11, + zeta12, + zeta13, + ); + (a.into(), b.into()) } } #[inline(always)] #[allow(unsafe_code)] - fn invert_ntt_at_layer_1(simd_unit: Self, zeta0: i32, zeta1: i32) -> Self { - unsafe { ntt::invert_ntt_at_layer_1(simd_unit.coefficients, zeta0, zeta1).into() } + fn invert_ntt_at_layer_1( + simd_unit0: Self, + simd_unit1: Self, + zeta00: i32, + zeta01: i32, + zeta10: i32, + zeta11: i32, + ) -> (Self, Self) { + unsafe { + let (a, b) = ntt::invert_ntt_at_layer_1( + simd_unit0.coefficients, + simd_unit1.coefficients, + zeta00, + zeta01, + zeta10, + zeta11, + ); + (a.into(), b.into()) + } } #[inline(always)] #[allow(unsafe_code)] - fn invert_ntt_at_layer_2(simd_unit: Self, zeta: i32) -> Self { - unsafe { ntt::invert_ntt_at_layer_2(simd_unit.coefficients, zeta).into() } + fn invert_ntt_at_layer_2( + simd_unit0: Self, + simd_unit1: Self, + zeta0: i32, + zeta1: i32, + ) -> (Self, Self) { + unsafe { + let (a, b) = ntt::invert_ntt_at_layer_2( + simd_unit0.coefficients, + simd_unit1.coefficients, + zeta0, + zeta1, + ); + (a.into(), b.into()) + } } } diff --git a/libcrux-ml-dsa/src/simd/avx2/ntt.rs b/libcrux-ml-dsa/src/simd/avx2/ntt.rs index cb84a2933..9f167b7f3 100644 --- a/libcrux-ml-dsa/src/simd/avx2/ntt.rs +++ b/libcrux-ml-dsa/src/simd/avx2/ntt.rs @@ -100,28 +100,6 @@ fn butterfly_8(a: Vec256, b: Vec256, zeta0: i32, zeta1: i32) -> (Vec256, Vec256) (a_out, b_out) } -#[cfg_attr(not(hax), target_feature(enable = "avx2"))] -#[allow(unsafe_code)] -pub(super) unsafe fn invert_ntt_at_layer_0( - simd_unit: Vec256, - zeta0: i32, - zeta1: i32, - zeta2: i32, - zeta3: i32, -) -> Vec256 { - let zetas = mm256_set_epi32(zeta3, 0, zeta2, 0, zeta1, 0, zeta0, 0); - - let add_by_signs = mm256_set_epi32(-1, 1, -1, 1, -1, 1, -1, 1); - let add_by = mm256_shuffle_epi32::<0b10_11_00_01>(simd_unit); - let add_by = mm256_mullo_epi32(add_by, add_by_signs); - - let sums = mm256_add_epi32(simd_unit, add_by); - - let products = arithmetic::montgomery_multiply(sums, zetas); - - mm256_blend_epi32::<0b1_0_1_0_1_0_1_0>(sums, products) -} - #[cfg_attr(not(hax), target_feature(enable = "avx2"))] #[allow(unsafe_code)] unsafe fn ntt_at_layer_0(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { @@ -204,22 +182,6 @@ unsafe fn ntt_at_layer_0(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { ); } -#[cfg_attr(not(hax), target_feature(enable = "avx2"))] -#[allow(unsafe_code)] -pub(super) unsafe fn invert_ntt_at_layer_1(simd_unit: Vec256, zeta0: i32, zeta1: i32) -> Vec256 { - let zetas = mm256_set_epi32(zeta1, zeta1, 0, 0, zeta0, zeta0, 0, 0); - - let add_by_signs = mm256_set_epi32(-1, -1, 1, 1, -1, -1, 1, 1); - let add_by = mm256_shuffle_epi32::<0b01_00_11_10>(simd_unit); - let add_by = mm256_mullo_epi32(add_by, add_by_signs); - - let sums = mm256_add_epi32(simd_unit, add_by); - - let products = arithmetic::montgomery_multiply(sums, zetas); - - mm256_blend_epi32::<0b1_1_0_0_1_1_0_0>(sums, products) -} - #[cfg_attr(not(hax), target_feature(enable = "avx2"))] #[allow(unsafe_code)] unsafe fn ntt_at_layer_1(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { @@ -255,22 +217,6 @@ unsafe fn ntt_at_layer_1(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { round(re, 30, -3019102, -3881060, -3628969, 3839961); } -#[cfg_attr(not(hax), target_feature(enable = "avx2"))] -#[allow(unsafe_code)] -pub(super) unsafe fn invert_ntt_at_layer_2(simd_unit: Vec256, zeta: i32) -> Vec256 { - let zetas = mm256_set_epi32(zeta, zeta, zeta, zeta, 0, 0, 0, 0); - - let add_by_signs = mm256_set_epi32(-1, -1, -1, -1, 1, 1, 1, 1); - let add_by = mm256_permute4x64_epi64::<0b01_00_11_10>(simd_unit); - let add_by = mm256_mullo_epi32(add_by, add_by_signs); - - let sums = mm256_add_epi32(simd_unit, add_by); - - let products = arithmetic::montgomery_multiply(sums, zetas); - - mm256_blend_epi32::<0b1_1_1_1_0_0_0_0>(sums, products) -} - #[cfg_attr(not(hax), target_feature(enable = "avx2"))] #[allow(unsafe_code)] unsafe fn ntt_at_layer_2(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { @@ -503,3 +449,87 @@ pub(crate) fn ntt( re } + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +pub(super) unsafe fn invert_ntt_at_layer_0( + simd_unit0: Vec256, + simd_unit1: Vec256, + zeta00: i32, + zeta01: i32, + zeta02: i32, + zeta03: i32, + zeta10: i32, + zeta11: i32, + zeta12: i32, + zeta13: i32, +) -> (Vec256, Vec256) { + const SHUFFLE: i32 = 0b11_01_10_00; + let a_shuffled = mm256_shuffle_epi32::(simd_unit0); + let b_shuffled = mm256_shuffle_epi32::(simd_unit1); + + let lo_values = mm256_unpacklo_epi64(a_shuffled, b_shuffled); + let hi_values = mm256_unpackhi_epi64(a_shuffled, b_shuffled); + + let sums = arithmetic::add(lo_values, hi_values); + let differences = arithmetic::subtract(hi_values, lo_values); + + let zetas = mm256_set_epi32(zeta13, zeta12, zeta03, zeta02,zeta11, zeta10, zeta01, zeta00); + let products = arithmetic::montgomery_multiply(differences, zetas); + + let a_shuffled = mm256_unpacklo_epi64(sums, products); + let b_shuffled = mm256_unpackhi_epi64(sums, products); + + let a = mm256_shuffle_epi32::(a_shuffled); + let b = mm256_shuffle_epi32::(b_shuffled); + + (a, b) +} + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +pub(super) unsafe fn invert_ntt_at_layer_1( + simd_unit0: Vec256, + simd_unit1: Vec256, + zeta00: i32, + zeta01: i32, + zeta10: i32, + zeta11: i32, +) -> (Vec256, Vec256) { + let lo_values = mm256_unpacklo_epi64(simd_unit0, simd_unit1); + let hi_values = mm256_unpackhi_epi64(simd_unit0, simd_unit1); + + let sums = arithmetic::add(lo_values, hi_values); + let differences = arithmetic::subtract(hi_values, lo_values); + + let zetas = mm256_set_epi32(zeta11, zeta11, zeta01, zeta01,zeta10, zeta10, zeta00, zeta00); + let products = arithmetic::montgomery_multiply(differences, zetas); + + let a = mm256_unpacklo_epi64(sums, products); + let b = mm256_unpackhi_epi64(sums, products); + + (a, b) +} + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +pub(super) unsafe fn invert_ntt_at_layer_2( + simd_unit0: Vec256, + simd_unit1: Vec256, + zeta0: i32, + zeta1: i32, +) -> (Vec256, Vec256) { + let lo_values = mm256_permute2x128_si256::<0x20>(simd_unit0, simd_unit1); + let hi_values = mm256_permute2x128_si256::<0x31>(simd_unit0, simd_unit1); + + let sums = arithmetic::add(lo_values, hi_values); + let differences = arithmetic::subtract(hi_values, lo_values); + + let zetas = mm256_set_epi32(zeta1, zeta1, zeta1, zeta1,zeta0, zeta0, zeta0, zeta0); + let products = arithmetic::montgomery_multiply(differences, zetas); + + let a = mm256_permute2x128_si256::<0x20>(sums, products); + let b = mm256_permute2x128_si256::<0x31>(sums, products); + + (a,b) +} diff --git a/libcrux-ml-dsa/src/simd/portable.rs b/libcrux-ml-dsa/src/simd/portable.rs index d45daf829..13ecf85dc 100644 --- a/libcrux-ml-dsa/src/simd/portable.rs +++ b/libcrux-ml-dsa/src/simd/portable.rs @@ -107,18 +107,45 @@ impl Operations for PortableSIMDUnit { } fn invert_ntt_at_layer_0( - simd_unit: Self, + simd_unit0: Self, + simd_unit1: Self, + zeta00: i32, + zeta01: i32, + zeta02: i32, + zeta03: i32, + zeta10: i32, + zeta11: i32, + zeta12: i32, + zeta13: i32, + ) -> (Self, Self) { + ( + ntt::invert_ntt_at_layer_0(simd_unit0, zeta00, zeta01, zeta02, zeta03), + ntt::invert_ntt_at_layer_0(simd_unit1, zeta10, zeta11, zeta12, zeta13), + ) + } + fn invert_ntt_at_layer_1( + simd_unit0: Self, + simd_unit1: Self, + zeta00: i32, + zeta01: i32, + zeta10: i32, + zeta11: i32, + ) -> (Self, Self) { + ( + ntt::invert_ntt_at_layer_1(simd_unit0, zeta00, zeta01), + ntt::invert_ntt_at_layer_1(simd_unit1, zeta10, zeta11), + ) + } + + fn invert_ntt_at_layer_2( + simd_unit0: Self, + simd_unit1: Self, zeta0: i32, zeta1: i32, - zeta2: i32, - zeta3: i32, - ) -> Self { - ntt::invert_ntt_at_layer_0(simd_unit, zeta0, zeta1, zeta2, zeta3) - } - fn invert_ntt_at_layer_1(simd_unit: Self, zeta0: i32, zeta1: i32) -> Self { - ntt::invert_ntt_at_layer_1(simd_unit, zeta0, zeta1) - } - fn invert_ntt_at_layer_2(simd_unit: Self, zeta: i32) -> Self { - ntt::invert_ntt_at_layer_2(simd_unit, zeta) + ) -> (Self, Self) { + ( + ntt::invert_ntt_at_layer_2(simd_unit0, zeta0), + ntt::invert_ntt_at_layer_2(simd_unit1, zeta1), + ) } } diff --git a/libcrux-ml-dsa/src/simd/traits.rs b/libcrux-ml-dsa/src/simd/traits.rs index 38715a115..a5cf98c00 100644 --- a/libcrux-ml-dsa/src/simd/traits.rs +++ b/libcrux-ml-dsa/src/simd/traits.rs @@ -77,14 +77,31 @@ pub(crate) trait Operations: Copy + Clone { // Inverse NTT fn invert_ntt_at_layer_0( - simd_unit: Self, + simd_unit0: Self, + simd_unit1: Self, + zeta00: i32, + zeta01: i32, + zeta02: i32, + zeta03: i32, + zeta10: i32, + zeta11: i32, + zeta12: i32, + zeta13: i32, + ) -> (Self, Self); + fn invert_ntt_at_layer_1( + simd_unit0: Self, + simd_unit1: Self, + zeta00: i32, + zeta01: i32, + zeta10: i32, + zeta11: i32, + ) -> (Self, Self); + fn invert_ntt_at_layer_2( + simd_unit0: Self, + simd_unit1: Self, zeta0: i32, zeta1: i32, - zeta2: i32, - zeta3: i32, - ) -> Self; - fn invert_ntt_at_layer_1(simd_unit: Self, zeta0: i32, zeta1: i32) -> Self; - fn invert_ntt_at_layer_2(simd_unit: Self, zeta: i32) -> Self; + ) -> (Self, Self); } // hax does not support trait with default implementations, so we use the From 8bdb4ff601c18d722251e95e01dfac38733b84b6 Mon Sep 17 00:00:00 2001 From: Jonas Schneider-Bensch Date: Thu, 14 Nov 2024 11:50:38 +0100 Subject: [PATCH 2/5] Push inverse NTT into the implementations --- libcrux-ml-dsa/src/arithmetic.rs | 5 - libcrux-ml-dsa/src/ntt.rs | 267 +------------- libcrux-ml-dsa/src/simd/avx2.rs | 82 +---- libcrux-ml-dsa/src/simd/avx2/invntt.rs | 346 ++++++++++++++++++ libcrux-ml-dsa/src/simd/avx2/ntt.rs | 84 ----- libcrux-ml-dsa/src/simd/portable.rs | 50 +-- .../src/simd/portable/arithmetic.rs | 7 +- libcrux-ml-dsa/src/simd/portable/invntt.rs | 315 ++++++++++++++++ libcrux-ml-dsa/src/simd/portable/ntt.rs | 79 +--- libcrux-ml-dsa/src/simd/traits.rs | 38 +- 10 files changed, 686 insertions(+), 587 deletions(-) create mode 100644 libcrux-ml-dsa/src/simd/avx2/invntt.rs create mode 100644 libcrux-ml-dsa/src/simd/portable/invntt.rs diff --git a/libcrux-ml-dsa/src/arithmetic.rs b/libcrux-ml-dsa/src/arithmetic.rs index 25db431db..ff91f65a7 100644 --- a/libcrux-ml-dsa/src/arithmetic.rs +++ b/libcrux-ml-dsa/src/arithmetic.rs @@ -20,11 +20,6 @@ pub(crate) fn vector_infinity_norm_exceeds( re: PolynomialRingElement, diff --git a/libcrux-ml-dsa/src/ntt.rs b/libcrux-ml-dsa/src/ntt.rs index 300de68ef..1ea58c883 100644 --- a/libcrux-ml-dsa/src/ntt.rs +++ b/libcrux-ml-dsa/src/ntt.rs @@ -1,8 +1,4 @@ -use crate::{ - arithmetic::FieldElementTimesMontgomeryR, - polynomial::PolynomialRingElement, - simd::traits::{montgomery_multiply_by_fer, Operations, COEFFICIENTS_IN_SIMD_UNIT}, -}; +use crate::{polynomial::PolynomialRingElement, simd::traits::Operations}; #[inline(always)] pub(crate) fn ntt( @@ -13,268 +9,13 @@ pub(crate) fn ntt( } } -#[inline(always)] -fn invert_ntt_at_layer_0(re: &mut PolynomialRingElement) { - #[inline(always)] - fn round( - re: &mut PolynomialRingElement, - index: usize, - zeta00: i32, - zeta01: i32, - zeta02: i32, - zeta03: i32, - zeta10: i32, - zeta11: i32, - zeta12: i32, - zeta13: i32, - ) { - (re.simd_units[index], re.simd_units[index + 1]) = SIMDUnit::invert_ntt_at_layer_0( - re.simd_units[index], - re.simd_units[index + 1], - zeta00, - zeta01, - zeta02, - zeta03, - zeta10, - zeta11, - zeta12, - zeta13, - ); - } - - round( - re, 0, 1976782, -846154, 1400424, 3937738, -1362209, -48306, 3919660, -554416, - ); - round( - re, 2, -3545687, 1612842, -976891, 183443, -2286327, -420899, -2235985, -2939036, - ); - round( - re, 4, -3833893, -260646, -1104333, -1667432, 1910376, -1803090, 1723600, -426683, - ); - round( - re, 6, 472078, 1717735, -975884, 2213111, 269760, 3866901, 3523897, -3038916, - ); - round( - re, 8, -1799107, -3694233, 1652634, 810149, 3014001, 1616392, 162844, -3183426, - ); - round( - re, 10, -1207385, 185531, 3369112, 1957272, -164721, 2454455, 2432395, -2013608, - ); - round( - re, 12, -3776993, 594136, -3724270, -2584293, -1846953, -1671176, -2831860, -542412, - ); - round( - re, 14, 3406031, 2235880, 777191, 1500165, -1374803, -2546312, 1917081, -1279661, - ); - round( - re, 16, -1962642, 3306115, 1312455, -451100, -1430225, -3318210, 1237275, -1333058, - ); - round( - re, 18, -1050970, 1903435, 1869119, -2994039, -3548272, 2635921, 1250494, -3767016, - ); - round( - re, 20, 1595974, 2486353, 1247620, 4055324, 1265009, -2590150, 2691481, 2842341, - ); - round( - re, 22, 203044, 1735879, -3342277, 3437287, 4108315, -2437823, 286988, 342297, - ); - round( - re, 24, -3595838, -768622, -525098, -3556995, 3207046, 2031748, -3122442, -655327, - ); - round( - re, 26, -522500, -43260, -1613174, 495491, 819034, 909542, 1859098, 900702, - ); - round( - re, 28, -3193378, -1197226, -3759364, -3520352, 3513181, -1235728, 2434439, 266997, - ); - round( - re, 30, -3562462, -2446433, 2244091, -3342478, 3817976, 2316500, 3407706, 2091667, - ); -} - -#[inline(always)] -fn invert_ntt_at_layer_1(re: &mut PolynomialRingElement) { - #[inline(always)] - fn round( - re: &mut PolynomialRingElement, - index: usize, - zeta_00: i32, - zeta_01: i32, - zeta_10: i32, - zeta_11: i32, - ) { - (re.simd_units[index], re.simd_units[index + 1]) = SIMDUnit::invert_ntt_at_layer_1( - re.simd_units[index], - re.simd_units[index + 1], - zeta_00, - zeta_01, - zeta_10, - zeta_11, - ); - } - - round(re, 0, 3839961, -3628969, -3881060, -3019102); - round(re, 2, -1439742, -812732, -1584928, 1285669); - round(re, 4, 1341330, 1315589, -177440, -2409325); - round(re, 6, -1851402, 3159746, -3553272, 189548); - round(re, 8, -1316856, 759969, -210977, 2389356); - round(re, 10, -3249728, 1653064, -8578, -3724342); - round(re, 12, 3958618, 904516, -1100098, 44288); - round(re, 14, 3097992, 508951, 264944, -3343383); - round(re, 16, -1430430, 1852771, 1349076, -381987); - round(re, 18, -1308169, -22981, -1228525, -671102); - round(re, 20, -2477047, -411027, -3693493, -2967645); - round(re, 22, 2715295, 2147896, -983419, 3412210); - round(re, 24, 126922, -3632928, -3157330, -3190144); - round(re, 26, -1000202, -4083598, 1939314, -1257611); - round(re, 28, -1585221, 2176455, 3475950, -1452451); - round(re, 30, -3041255, -3677745, -1528703, -3930395); -} - -#[inline(always)] -fn invert_ntt_at_layer_2(re: &mut PolynomialRingElement) { - fn round( - re: &mut PolynomialRingElement, - index: usize, - zeta1: i32, - zeta2: i32, - ) { - (re.simd_units[index], re.simd_units[index + 1]) = SIMDUnit::invert_ntt_at_layer_2( - re.simd_units[index], - re.simd_units[index + 1], - zeta1, - zeta2, - ); - } - - round(re, 0, -2797779, 2071892); - round(re, 2, -2556880, 3900724); - round(re, 4, 3881043, 954230); - round(re, 6, 531354, 811944); - round(re, 8, 3699596, -1600420); - round(re, 10, -2140649, 3507263); - round(re, 12, -3821735, 3505694); - round(re, 14, -1643818, -1699267); - round(re, 16, -539299, 2348700); - round(re, 18, -300467, 3539968); - round(re, 20, -2867647, 3574422); - round(re, 22, -3043716, -3861115); - round(re, 24, 3915439, -2537516); - round(re, 26, -3592148, -1661693); - round(re, 28, 3530437, 3077325); - round(re, 30, 95776, 2706023); -} - -#[inline(always)] -fn outer_3_plus< - SIMDUnit: Operations, - const OFFSET: usize, - const STEP_BY: usize, - const ZETA: FieldElementTimesMontgomeryR, ->( - re: &mut PolynomialRingElement, -) { - for j in OFFSET..OFFSET + STEP_BY { - let a_minus_b = SIMDUnit::subtract(&re.simd_units[j + STEP_BY], &re.simd_units[j]); - re.simd_units[j] = SIMDUnit::add(&re.simd_units[j], &re.simd_units[j + STEP_BY]); - re.simd_units[j + STEP_BY] = montgomery_multiply_by_fer(a_minus_b, ZETA); - } - () -} - -#[inline(always)] -fn invert_ntt_at_layer_3(re: &mut PolynomialRingElement) { - const STEP: usize = 8; // 1 << LAYER; - const STEP_BY: usize = 1; // step / COEFFICIENTS_IN_SIMD_UNIT; - - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::( - re, - ); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::( - re, - ); - outer_3_plus::(re); - outer_3_plus::(re); -} - -#[inline(always)] -fn invert_ntt_at_layer_4(re: &mut PolynomialRingElement) { - const STEP: usize = 16; // 1 << LAYER; - const STEP_BY: usize = 2; // step / COEFFICIENTS_IN_SIMD_UNIT; - - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); -} - -#[inline(always)] -fn invert_ntt_at_layer_5(re: &mut PolynomialRingElement) { - const STEP: usize = 32; // 1 << LAYER; - const STEP_BY: usize = 4; // step / COEFFICIENTS_IN_SIMD_UNIT; - - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); - outer_3_plus::(re); -} - -#[inline(always)] -fn invert_ntt_at_layer_6(re: &mut PolynomialRingElement) { - const STEP: usize = 64; // 1 << LAYER; - const STEP_BY: usize = 8; // step / COEFFICIENTS_IN_SIMD_UNIT; - - outer_3_plus::(re); - outer_3_plus::(re); -} - -#[inline(always)] -fn invert_ntt_at_layer_7(re: &mut PolynomialRingElement) { - const STEP: usize = 128; // 1 << LAYER; - const STEP_BY: usize = 16; // step / COEFFICIENTS_IN_SIMD_UNIT; - - outer_3_plus::(re); -} - #[inline(always)] pub(crate) fn invert_ntt_montgomery( - mut re: PolynomialRingElement, + re: PolynomialRingElement, ) -> PolynomialRingElement { - invert_ntt_at_layer_0(&mut re); - invert_ntt_at_layer_1(&mut re); - invert_ntt_at_layer_2(&mut re); - invert_ntt_at_layer_3(&mut re); - invert_ntt_at_layer_4(&mut re); - invert_ntt_at_layer_5(&mut re); - invert_ntt_at_layer_6(&mut re); - invert_ntt_at_layer_7(&mut re); - - for i in 0..re.simd_units.len() { - // After invert_ntt_at_layer, elements are of the form a * MONTGOMERY_R^{-1} - // we multiply by (MONTGOMERY_R^2) * (1/2^8) mod Q = 41,978 to both: - // - // - Divide the elements by 256 and - // - Convert the elements form montgomery domain to the standard domain. - re.simd_units[i] = SIMDUnit::montgomery_multiply_by_constant(re.simd_units[i], 41_978); + PolynomialRingElement { + simd_units: SIMDUnit::invert_ntt_montgomery(re.simd_units), } - - re } #[inline(always)] diff --git a/libcrux-ml-dsa/src/simd/avx2.rs b/libcrux-ml-dsa/src/simd/avx2.rs index 6c1c9f1c7..d5cda168c 100644 --- a/libcrux-ml-dsa/src/simd/avx2.rs +++ b/libcrux-ml-dsa/src/simd/avx2.rs @@ -2,6 +2,7 @@ use crate::simd::traits::{Operations, SIMD_UNITS_IN_RING_ELEMENT}; mod arithmetic; mod encoding; +mod invntt; mod ntt; mod rejection_sample; mod vector_type; @@ -31,10 +32,7 @@ impl Operations for AVX2SIMDUnit { fn subtract(lhs: &Self, rhs: &Self) -> Self { arithmetic::subtract(lhs.coefficients, rhs.coefficients).into() } - #[inline(always)] - fn montgomery_multiply_by_constant(simd_unit: Self, constant: i32) -> Self { - arithmetic::montgomery_multiply_by_constant(simd_unit.coefficients, constant).into() - } + #[inline(always)] fn montgomery_multiply(lhs: Self, rhs: Self) -> Self { arithmetic::montgomery_multiply(lhs.coefficients, rhs.coefficients).into() @@ -136,75 +134,11 @@ impl Operations for AVX2SIMDUnit { } #[inline(always)] - #[allow(unsafe_code)] - fn invert_ntt_at_layer_0( - simd_unit0: Self, - simd_unit1: Self, - zeta00: i32, - zeta01: i32, - zeta02: i32, - zeta03: i32, - zeta10: i32, - zeta11: i32, - zeta12: i32, - zeta13: i32, - ) -> (Self, Self) { - unsafe { - let (a, b) = ntt::invert_ntt_at_layer_0( - simd_unit0.coefficients, - simd_unit1.coefficients, - zeta00, - zeta01, - zeta02, - zeta03, - zeta10, - zeta11, - zeta12, - zeta13, - ); - (a.into(), b.into()) - } - } - - #[inline(always)] - #[allow(unsafe_code)] - fn invert_ntt_at_layer_1( - simd_unit0: Self, - simd_unit1: Self, - zeta00: i32, - zeta01: i32, - zeta10: i32, - zeta11: i32, - ) -> (Self, Self) { - unsafe { - let (a, b) = ntt::invert_ntt_at_layer_1( - simd_unit0.coefficients, - simd_unit1.coefficients, - zeta00, - zeta01, - zeta10, - zeta11, - ); - (a.into(), b.into()) - } - } - - #[inline(always)] - #[allow(unsafe_code)] - fn invert_ntt_at_layer_2( - simd_unit0: Self, - simd_unit1: Self, - zeta0: i32, - zeta1: i32, - ) -> (Self, Self) { - unsafe { - let (a, b) = ntt::invert_ntt_at_layer_2( - simd_unit0.coefficients, - simd_unit1.coefficients, - zeta0, - zeta1, - ); - (a.into(), b.into()) - } + fn invert_ntt_montgomery( + simd_units: [Self; SIMD_UNITS_IN_RING_ELEMENT], + ) -> [Self; SIMD_UNITS_IN_RING_ELEMENT] { + let result = invntt::invert_ntt_montgomery(simd_units.map(|x| x.coefficients)); + + result.map(|x| x.into()) } } diff --git a/libcrux-ml-dsa/src/simd/avx2/invntt.rs b/libcrux-ml-dsa/src/simd/avx2/invntt.rs new file mode 100644 index 000000000..4d01c7fe1 --- /dev/null +++ b/libcrux-ml-dsa/src/simd/avx2/invntt.rs @@ -0,0 +1,346 @@ +use super::arithmetic; +use crate::simd::traits::{COEFFICIENTS_IN_SIMD_UNIT, SIMD_UNITS_IN_RING_ELEMENT}; + +use libcrux_intrinsics::avx2::*; + +#[inline(always)] +#[allow(unsafe_code)] +pub(crate) fn invert_ntt_montgomery( + mut re: [Vec256; SIMD_UNITS_IN_RING_ELEMENT], +) -> [Vec256; SIMD_UNITS_IN_RING_ELEMENT] { + unsafe { + invert_ntt_at_layer_0(&mut re); + invert_ntt_at_layer_1(&mut re); + invert_ntt_at_layer_2(&mut re); + invert_ntt_at_layer_3(&mut re); + invert_ntt_at_layer_4(&mut re); + invert_ntt_at_layer_5(&mut re); + invert_ntt_at_layer_6(&mut re); + invert_ntt_at_layer_7(&mut re); + } + for i in 0..re.len() { + // After invert_ntt_at_layer, elements are of the form a * MONTGOMERY_R^{-1} + // we multiply by (MONTGOMERY_R^2) * (1/2^8) mod Q = 41,978 to both: + // + // - Divide the elements by 256 and + // - Convert the elements form montgomery domain to the standard domain. + re[i] = arithmetic::montgomery_multiply_by_constant(re[i], 41_978); + } + + re +} + +#[inline(always)] +fn simd_unit_invert_ntt_at_layer_0( + simd_unit0: Vec256, + simd_unit1: Vec256, + zeta00: i32, + zeta01: i32, + zeta02: i32, + zeta03: i32, + zeta10: i32, + zeta11: i32, + zeta12: i32, + zeta13: i32, +) -> (Vec256, Vec256) { + const SHUFFLE: i32 = 0b11_01_10_00; + let a_shuffled = mm256_shuffle_epi32::(simd_unit0); + let b_shuffled = mm256_shuffle_epi32::(simd_unit1); + + let lo_values = mm256_unpacklo_epi64(a_shuffled, b_shuffled); + let hi_values = mm256_unpackhi_epi64(a_shuffled, b_shuffled); + + let sums = arithmetic::add(lo_values, hi_values); + let differences = arithmetic::subtract(hi_values, lo_values); + + let zetas = mm256_set_epi32( + zeta13, zeta12, zeta03, zeta02, zeta11, zeta10, zeta01, zeta00, + ); + let products = arithmetic::montgomery_multiply(differences, zetas); + + let a_shuffled = mm256_unpacklo_epi64(sums, products); + let b_shuffled = mm256_unpackhi_epi64(sums, products); + + let a = mm256_shuffle_epi32::(a_shuffled); + let b = mm256_shuffle_epi32::(b_shuffled); + + (a, b) +} + +#[inline(always)] +fn simd_unit_invert_ntt_at_layer_1( + simd_unit0: Vec256, + simd_unit1: Vec256, + zeta00: i32, + zeta01: i32, + zeta10: i32, + zeta11: i32, +) -> (Vec256, Vec256) { + let lo_values = mm256_unpacklo_epi64(simd_unit0, simd_unit1); + let hi_values = mm256_unpackhi_epi64(simd_unit0, simd_unit1); + + let sums = arithmetic::add(lo_values, hi_values); + let differences = arithmetic::subtract(hi_values, lo_values); + + let zetas = mm256_set_epi32( + zeta11, zeta11, zeta01, zeta01, zeta10, zeta10, zeta00, zeta00, + ); + let products = arithmetic::montgomery_multiply(differences, zetas); + + let a = mm256_unpacklo_epi64(sums, products); + let b = mm256_unpackhi_epi64(sums, products); + + (a, b) +} + +#[inline(always)] +fn simd_unit_invert_ntt_at_layer_2( + simd_unit0: Vec256, + simd_unit1: Vec256, + zeta0: i32, + zeta1: i32, +) -> (Vec256, Vec256) { + let lo_values = mm256_permute2x128_si256::<0x20>(simd_unit0, simd_unit1); + let hi_values = mm256_permute2x128_si256::<0x31>(simd_unit0, simd_unit1); + + let sums = arithmetic::add(lo_values, hi_values); + let differences = arithmetic::subtract(hi_values, lo_values); + + let zetas = mm256_set_epi32(zeta1, zeta1, zeta1, zeta1, zeta0, zeta0, zeta0, zeta0); + let products = arithmetic::montgomery_multiply(differences, zetas); + + let a = mm256_permute2x128_si256::<0x20>(sums, products); + let b = mm256_permute2x128_si256::<0x31>(sums, products); + + (a, b) +} + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +unsafe fn invert_ntt_at_layer_0(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { + #[inline(always)] + fn round( + re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT], + index: usize, + zeta00: i32, + zeta01: i32, + zeta02: i32, + zeta03: i32, + zeta10: i32, + zeta11: i32, + zeta12: i32, + zeta13: i32, + ) { + (re[index], re[index + 1]) = simd_unit_invert_ntt_at_layer_0( + re[index], + re[index + 1], + zeta00, + zeta01, + zeta02, + zeta03, + zeta10, + zeta11, + zeta12, + zeta13, + ); + } + + round( + re, 0, 1976782, -846154, 1400424, 3937738, -1362209, -48306, 3919660, -554416, + ); + round( + re, 2, -3545687, 1612842, -976891, 183443, -2286327, -420899, -2235985, -2939036, + ); + round( + re, 4, -3833893, -260646, -1104333, -1667432, 1910376, -1803090, 1723600, -426683, + ); + round( + re, 6, 472078, 1717735, -975884, 2213111, 269760, 3866901, 3523897, -3038916, + ); + round( + re, 8, -1799107, -3694233, 1652634, 810149, 3014001, 1616392, 162844, -3183426, + ); + round( + re, 10, -1207385, 185531, 3369112, 1957272, -164721, 2454455, 2432395, -2013608, + ); + round( + re, 12, -3776993, 594136, -3724270, -2584293, -1846953, -1671176, -2831860, -542412, + ); + round( + re, 14, 3406031, 2235880, 777191, 1500165, -1374803, -2546312, 1917081, -1279661, + ); + round( + re, 16, -1962642, 3306115, 1312455, -451100, -1430225, -3318210, 1237275, -1333058, + ); + round( + re, 18, -1050970, 1903435, 1869119, -2994039, -3548272, 2635921, 1250494, -3767016, + ); + round( + re, 20, 1595974, 2486353, 1247620, 4055324, 1265009, -2590150, 2691481, 2842341, + ); + round( + re, 22, 203044, 1735879, -3342277, 3437287, 4108315, -2437823, 286988, 342297, + ); + round( + re, 24, -3595838, -768622, -525098, -3556995, 3207046, 2031748, -3122442, -655327, + ); + round( + re, 26, -522500, -43260, -1613174, 495491, 819034, 909542, 1859098, 900702, + ); + round( + re, 28, -3193378, -1197226, -3759364, -3520352, 3513181, -1235728, 2434439, 266997, + ); + round( + re, 30, -3562462, -2446433, 2244091, -3342478, 3817976, 2316500, 3407706, 2091667, + ); +} + +#[allow(unsafe_code)] +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +unsafe fn invert_ntt_at_layer_1(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { + #[inline(always)] + fn round( + re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT], + index: usize, + zeta_00: i32, + zeta_01: i32, + zeta_10: i32, + zeta_11: i32, + ) { + (re[index], re[index + 1]) = simd_unit_invert_ntt_at_layer_1( + re[index], + re[index + 1], + zeta_00, + zeta_01, + zeta_10, + zeta_11, + ); + } + + round(re, 0, 3839961, -3628969, -3881060, -3019102); + round(re, 2, -1439742, -812732, -1584928, 1285669); + round(re, 4, 1341330, 1315589, -177440, -2409325); + round(re, 6, -1851402, 3159746, -3553272, 189548); + round(re, 8, -1316856, 759969, -210977, 2389356); + round(re, 10, -3249728, 1653064, -8578, -3724342); + round(re, 12, 3958618, 904516, -1100098, 44288); + round(re, 14, 3097992, 508951, 264944, -3343383); + round(re, 16, -1430430, 1852771, 1349076, -381987); + round(re, 18, -1308169, -22981, -1228525, -671102); + round(re, 20, -2477047, -411027, -3693493, -2967645); + round(re, 22, 2715295, 2147896, -983419, 3412210); + round(re, 24, 126922, -3632928, -3157330, -3190144); + round(re, 26, -1000202, -4083598, 1939314, -1257611); + round(re, 28, -1585221, 2176455, 3475950, -1452451); + round(re, 30, -3041255, -3677745, -1528703, -3930395); +} + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +unsafe fn invert_ntt_at_layer_2(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { + #[inline(always)] + fn round(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT], index: usize, zeta1: i32, zeta2: i32) { + (re[index], re[index + 1]) = + simd_unit_invert_ntt_at_layer_2(re[index], re[index + 1], zeta1, zeta2); + } + + round(re, 0, -2797779, 2071892); + round(re, 2, -2556880, 3900724); + round(re, 4, 3881043, 954230); + round(re, 6, 531354, 811944); + round(re, 8, 3699596, -1600420); + round(re, 10, -2140649, 3507263); + round(re, 12, -3821735, 3505694); + round(re, 14, -1643818, -1699267); + round(re, 16, -539299, 2348700); + round(re, 18, -300467, 3539968); + round(re, 20, -2867647, 3574422); + round(re, 22, -3043716, -3861115); + round(re, 24, 3915439, -2537516); + round(re, 26, -3592148, -1661693); + round(re, 28, 3530437, 3077325); + round(re, 30, 95776, 2706023); +} + +#[inline(always)] +fn outer_3_plus( + re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT], +) { + for j in OFFSET..OFFSET + STEP_BY { + let a_minus_b = arithmetic::subtract(re[j + STEP_BY], re[j]); + re[j] = arithmetic::add(re[j], re[j + STEP_BY]); + re[j + STEP_BY] = arithmetic::montgomery_multiply_by_constant(a_minus_b, ZETA); + } + () +} + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +unsafe fn invert_ntt_at_layer_3(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 8; // 1 << LAYER; + const STEP_BY: usize = 1; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 280005>(re); + outer_3_plus::<{ (1 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 4010497>(re); + outer_3_plus::<{ (2 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -19422>(re); + outer_3_plus::<{ (3 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 1757237>(re); + outer_3_plus::<{ (4 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -3277672>(re); + outer_3_plus::<{ (5 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -1399561>(re); + outer_3_plus::<{ (6 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -3859737>(re); + outer_3_plus::<{ (7 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2118186>(re); + outer_3_plus::<{ (8 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2108549>(re); + outer_3_plus::<{ (9 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 2619752>(re); + outer_3_plus::<{ (10 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -1119584>(re); + outer_3_plus::<{ (11 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -549488>(re); + outer_3_plus::<{ (12 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 3585928>(re); + outer_3_plus::<{ (13 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -1079900>(re); + outer_3_plus::<{ (14 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 1024112>(re); + outer_3_plus::<{ (15 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 2725464>(re); +} + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +unsafe fn invert_ntt_at_layer_4(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 16; // 1 << LAYER; + const STEP_BY: usize = 2; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 2680103>(re); + outer_3_plus::<{ (1 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 3111497>(re); + outer_3_plus::<{ (2 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2884855>(re); + outer_3_plus::<{ (3 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 3119733>(re); + outer_3_plus::<{ (4 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2091905>(re); + outer_3_plus::<{ (5 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -359251>(re); + outer_3_plus::<{ (6 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 2353451>(re); + outer_3_plus::<{ (7 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 1826347>(re); +} + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +unsafe fn invert_ntt_at_layer_5(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 32; // 1 << LAYER; + const STEP_BY: usize = 4; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 466468>(re); + outer_3_plus::<{ (1 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -876248>(re); + outer_3_plus::<{ (2 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -777960>(re); + outer_3_plus::<{ (3 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 237124>(re); +} + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +unsafe fn invert_ntt_at_layer_6(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 64; // 1 << LAYER; + const STEP_BY: usize = 8; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -518909>(re); + outer_3_plus::<{ (1 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2608894>(re); +} + +#[cfg_attr(not(hax), target_feature(enable = "avx2"))] +#[allow(unsafe_code)] +unsafe fn invert_ntt_at_layer_7(re: &mut [Vec256; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 128; // 1 << LAYER; + const STEP_BY: usize = 16; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 25847>(re); +} diff --git a/libcrux-ml-dsa/src/simd/avx2/ntt.rs b/libcrux-ml-dsa/src/simd/avx2/ntt.rs index 9f167b7f3..799eb0247 100644 --- a/libcrux-ml-dsa/src/simd/avx2/ntt.rs +++ b/libcrux-ml-dsa/src/simd/avx2/ntt.rs @@ -449,87 +449,3 @@ pub(crate) fn ntt( re } - -#[cfg_attr(not(hax), target_feature(enable = "avx2"))] -#[allow(unsafe_code)] -pub(super) unsafe fn invert_ntt_at_layer_0( - simd_unit0: Vec256, - simd_unit1: Vec256, - zeta00: i32, - zeta01: i32, - zeta02: i32, - zeta03: i32, - zeta10: i32, - zeta11: i32, - zeta12: i32, - zeta13: i32, -) -> (Vec256, Vec256) { - const SHUFFLE: i32 = 0b11_01_10_00; - let a_shuffled = mm256_shuffle_epi32::(simd_unit0); - let b_shuffled = mm256_shuffle_epi32::(simd_unit1); - - let lo_values = mm256_unpacklo_epi64(a_shuffled, b_shuffled); - let hi_values = mm256_unpackhi_epi64(a_shuffled, b_shuffled); - - let sums = arithmetic::add(lo_values, hi_values); - let differences = arithmetic::subtract(hi_values, lo_values); - - let zetas = mm256_set_epi32(zeta13, zeta12, zeta03, zeta02,zeta11, zeta10, zeta01, zeta00); - let products = arithmetic::montgomery_multiply(differences, zetas); - - let a_shuffled = mm256_unpacklo_epi64(sums, products); - let b_shuffled = mm256_unpackhi_epi64(sums, products); - - let a = mm256_shuffle_epi32::(a_shuffled); - let b = mm256_shuffle_epi32::(b_shuffled); - - (a, b) -} - -#[cfg_attr(not(hax), target_feature(enable = "avx2"))] -#[allow(unsafe_code)] -pub(super) unsafe fn invert_ntt_at_layer_1( - simd_unit0: Vec256, - simd_unit1: Vec256, - zeta00: i32, - zeta01: i32, - zeta10: i32, - zeta11: i32, -) -> (Vec256, Vec256) { - let lo_values = mm256_unpacklo_epi64(simd_unit0, simd_unit1); - let hi_values = mm256_unpackhi_epi64(simd_unit0, simd_unit1); - - let sums = arithmetic::add(lo_values, hi_values); - let differences = arithmetic::subtract(hi_values, lo_values); - - let zetas = mm256_set_epi32(zeta11, zeta11, zeta01, zeta01,zeta10, zeta10, zeta00, zeta00); - let products = arithmetic::montgomery_multiply(differences, zetas); - - let a = mm256_unpacklo_epi64(sums, products); - let b = mm256_unpackhi_epi64(sums, products); - - (a, b) -} - -#[cfg_attr(not(hax), target_feature(enable = "avx2"))] -#[allow(unsafe_code)] -pub(super) unsafe fn invert_ntt_at_layer_2( - simd_unit0: Vec256, - simd_unit1: Vec256, - zeta0: i32, - zeta1: i32, -) -> (Vec256, Vec256) { - let lo_values = mm256_permute2x128_si256::<0x20>(simd_unit0, simd_unit1); - let hi_values = mm256_permute2x128_si256::<0x31>(simd_unit0, simd_unit1); - - let sums = arithmetic::add(lo_values, hi_values); - let differences = arithmetic::subtract(hi_values, lo_values); - - let zetas = mm256_set_epi32(zeta1, zeta1, zeta1, zeta1,zeta0, zeta0, zeta0, zeta0); - let products = arithmetic::montgomery_multiply(differences, zetas); - - let a = mm256_permute2x128_si256::<0x20>(sums, products); - let b = mm256_permute2x128_si256::<0x31>(sums, products); - - (a,b) -} diff --git a/libcrux-ml-dsa/src/simd/portable.rs b/libcrux-ml-dsa/src/simd/portable.rs index 13ecf85dc..f0c02d10e 100644 --- a/libcrux-ml-dsa/src/simd/portable.rs +++ b/libcrux-ml-dsa/src/simd/portable.rs @@ -5,6 +5,7 @@ mod vector_type; // Some of the portable implementations are used in lieu of vectorized ones in // the AVX2 module. pub(crate) mod encoding; +mod invntt; mod ntt; mod sample; @@ -31,12 +32,10 @@ impl Operations for PortableSIMDUnit { arithmetic::subtract(lhs, rhs) } - fn montgomery_multiply_by_constant(simd_unit: Self, c: i32) -> Self { - arithmetic::montgomery_multiply_by_constant(simd_unit, c) - } fn montgomery_multiply(lhs: Self, rhs: Self) -> Self { arithmetic::montgomery_multiply(&lhs, &rhs) } + fn shift_left_then_reduce(simd_unit: Self) -> Self { arithmetic::shift_left_then_reduce::(simd_unit) } @@ -106,46 +105,9 @@ impl Operations for PortableSIMDUnit { ntt::ntt(simd_units) } - fn invert_ntt_at_layer_0( - simd_unit0: Self, - simd_unit1: Self, - zeta00: i32, - zeta01: i32, - zeta02: i32, - zeta03: i32, - zeta10: i32, - zeta11: i32, - zeta12: i32, - zeta13: i32, - ) -> (Self, Self) { - ( - ntt::invert_ntt_at_layer_0(simd_unit0, zeta00, zeta01, zeta02, zeta03), - ntt::invert_ntt_at_layer_0(simd_unit1, zeta10, zeta11, zeta12, zeta13), - ) - } - fn invert_ntt_at_layer_1( - simd_unit0: Self, - simd_unit1: Self, - zeta00: i32, - zeta01: i32, - zeta10: i32, - zeta11: i32, - ) -> (Self, Self) { - ( - ntt::invert_ntt_at_layer_1(simd_unit0, zeta00, zeta01), - ntt::invert_ntt_at_layer_1(simd_unit1, zeta10, zeta11), - ) - } - - fn invert_ntt_at_layer_2( - simd_unit0: Self, - simd_unit1: Self, - zeta0: i32, - zeta1: i32, - ) -> (Self, Self) { - ( - ntt::invert_ntt_at_layer_2(simd_unit0, zeta0), - ntt::invert_ntt_at_layer_2(simd_unit1, zeta1), - ) + fn invert_ntt_montgomery( + simd_units: [Self; SIMD_UNITS_IN_RING_ELEMENT], + ) -> [Self; SIMD_UNITS_IN_RING_ELEMENT] { + invntt::invert_ntt_montgomery(simd_units) } } diff --git a/libcrux-ml-dsa/src/simd/portable/arithmetic.rs b/libcrux-ml-dsa/src/simd/portable/arithmetic.rs index 51e009243..f4c269470 100644 --- a/libcrux-ml-dsa/src/simd/portable/arithmetic.rs +++ b/libcrux-ml-dsa/src/simd/portable/arithmetic.rs @@ -6,11 +6,6 @@ use crate::{ }, }; -/// If 'x' denotes a value of type `fe`, values having this type hold a -/// representative y ≡ x·MONTGOMERY_R^(-1) (mod FIELD_MODULUS). -/// We use 'mfe' as a shorthand for this type -pub type MontgomeryFieldElement = i32; - pub(crate) const MONTGOMERY_SHIFT: u8 = 32; #[inline(always)] @@ -40,7 +35,7 @@ pub(crate) fn get_n_least_significant_bits(n: u8, value: u64) -> u64 { value & ((1 << n) - 1) } #[inline(always)] -pub(crate) fn montgomery_reduce_element(value: i64) -> MontgomeryFieldElement { +pub(crate) fn montgomery_reduce_element(value: i64) -> FieldElementTimesMontgomeryR { let t = get_n_least_significant_bits(MONTGOMERY_SHIFT, value as u64) * INVERSE_OF_MODULUS_MOD_MONTGOMERY_R; let k = get_n_least_significant_bits(MONTGOMERY_SHIFT, t) as i32; diff --git a/libcrux-ml-dsa/src/simd/portable/invntt.rs b/libcrux-ml-dsa/src/simd/portable/invntt.rs new file mode 100644 index 000000000..2cef94c7f --- /dev/null +++ b/libcrux-ml-dsa/src/simd/portable/invntt.rs @@ -0,0 +1,315 @@ +use super::arithmetic::{self, montgomery_multiply_fe_by_fer}; +use super::vector_type::PortableSIMDUnit; +use crate::simd::traits::{COEFFICIENTS_IN_SIMD_UNIT, SIMD_UNITS_IN_RING_ELEMENT}; + +#[inline(always)] +pub fn simd_unit_invert_ntt_at_layer_0( + mut simd_unit: PortableSIMDUnit, + zeta0: i32, + zeta1: i32, + zeta2: i32, + zeta3: i32, +) -> PortableSIMDUnit { + let a_minus_b = simd_unit.coefficients[1] - simd_unit.coefficients[0]; + simd_unit.coefficients[0] = simd_unit.coefficients[0] + simd_unit.coefficients[1]; + simd_unit.coefficients[1] = montgomery_multiply_fe_by_fer(a_minus_b, zeta0); + + let a_minus_b = simd_unit.coefficients[3] - simd_unit.coefficients[2]; + simd_unit.coefficients[2] = simd_unit.coefficients[2] + simd_unit.coefficients[3]; + simd_unit.coefficients[3] = montgomery_multiply_fe_by_fer(a_minus_b, zeta1); + + let a_minus_b = simd_unit.coefficients[5] - simd_unit.coefficients[4]; + simd_unit.coefficients[4] = simd_unit.coefficients[4] + simd_unit.coefficients[5]; + simd_unit.coefficients[5] = montgomery_multiply_fe_by_fer(a_minus_b, zeta2); + + let a_minus_b = simd_unit.coefficients[7] - simd_unit.coefficients[6]; + simd_unit.coefficients[6] = simd_unit.coefficients[6] + simd_unit.coefficients[7]; + simd_unit.coefficients[7] = montgomery_multiply_fe_by_fer(a_minus_b, zeta3); + + simd_unit +} + +#[inline(always)] +pub fn simd_unit_invert_ntt_at_layer_1( + mut simd_unit: PortableSIMDUnit, + zeta0: i32, + zeta1: i32, +) -> PortableSIMDUnit { + let a_minus_b = simd_unit.coefficients[2] - simd_unit.coefficients[0]; + simd_unit.coefficients[0] = simd_unit.coefficients[0] + simd_unit.coefficients[2]; + simd_unit.coefficients[2] = montgomery_multiply_fe_by_fer(a_minus_b, zeta0); + + let a_minus_b = simd_unit.coefficients[3] - simd_unit.coefficients[1]; + simd_unit.coefficients[1] = simd_unit.coefficients[1] + simd_unit.coefficients[3]; + simd_unit.coefficients[3] = montgomery_multiply_fe_by_fer(a_minus_b, zeta0); + + let a_minus_b = simd_unit.coefficients[6] - simd_unit.coefficients[4]; + simd_unit.coefficients[4] = simd_unit.coefficients[4] + simd_unit.coefficients[6]; + simd_unit.coefficients[6] = montgomery_multiply_fe_by_fer(a_minus_b, zeta1); + + let a_minus_b = simd_unit.coefficients[7] - simd_unit.coefficients[5]; + simd_unit.coefficients[5] = simd_unit.coefficients[5] + simd_unit.coefficients[7]; + simd_unit.coefficients[7] = montgomery_multiply_fe_by_fer(a_minus_b, zeta1); + + simd_unit +} + +#[inline(always)] +pub fn simd_unit_invert_ntt_at_layer_2( + mut simd_unit: PortableSIMDUnit, + zeta: i32, +) -> PortableSIMDUnit { + let a_minus_b = simd_unit.coefficients[4] - simd_unit.coefficients[0]; + simd_unit.coefficients[0] = simd_unit.coefficients[0] + simd_unit.coefficients[4]; + simd_unit.coefficients[4] = montgomery_multiply_fe_by_fer(a_minus_b, zeta); + + let a_minus_b = simd_unit.coefficients[5] - simd_unit.coefficients[1]; + simd_unit.coefficients[1] = simd_unit.coefficients[1] + simd_unit.coefficients[5]; + simd_unit.coefficients[5] = montgomery_multiply_fe_by_fer(a_minus_b, zeta); + + let a_minus_b = simd_unit.coefficients[6] - simd_unit.coefficients[2]; + simd_unit.coefficients[2] = simd_unit.coefficients[2] + simd_unit.coefficients[6]; + simd_unit.coefficients[6] = montgomery_multiply_fe_by_fer(a_minus_b, zeta); + + let a_minus_b = simd_unit.coefficients[7] - simd_unit.coefficients[3]; + simd_unit.coefficients[3] = simd_unit.coefficients[3] + simd_unit.coefficients[7]; + simd_unit.coefficients[7] = montgomery_multiply_fe_by_fer(a_minus_b, zeta); + + simd_unit +} + +#[inline(always)] +fn invert_ntt_at_layer_0(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { + #[inline(always)] + fn round( + re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT], + index: usize, + zeta0: i32, + zeta1: i32, + zeta2: i32, + zeta3: i32, + ) { + re[index] = simd_unit_invert_ntt_at_layer_0(re[index], zeta0, zeta1, zeta2, zeta3); + } + + round(re, 0, 1976782, -846154, 1400424, 3937738); + round(re, 1, -1362209, -48306, 3919660, -554416); + round(re, 2, -3545687, 1612842, -976891, 183443); + round(re, 3, -2286327, -420899, -2235985, -2939036); + round(re, 4, -3833893, -260646, -1104333, -1667432); + round(re, 5, 1910376, -1803090, 1723600, -426683); + round(re, 6, 472078, 1717735, -975884, 2213111); + round(re, 7, 269760, 3866901, 3523897, -3038916); + round(re, 8, -1799107, -3694233, 1652634, 810149); + round(re, 9, 3014001, 1616392, 162844, -3183426); + round(re, 10, -1207385, 185531, 3369112, 1957272); + round(re, 11, -164721, 2454455, 2432395, -2013608); + round(re, 12, -3776993, 594136, -3724270, -2584293); + round(re, 13, -1846953, -1671176, -2831860, -542412); + round(re, 14, 3406031, 2235880, 777191, 1500165); + round(re, 15, -1374803, -2546312, 1917081, -1279661); + round(re, 16, -1962642, 3306115, 1312455, -451100); + round(re, 17, -1430225, -3318210, 1237275, -1333058); + round(re, 18, -1050970, 1903435, 1869119, -2994039); + round(re, 19, -3548272, 2635921, 1250494, -3767016); + round(re, 20, 1595974, 2486353, 1247620, 4055324); + round(re, 21, 1265009, -2590150, 2691481, 2842341); + round(re, 22, 203044, 1735879, -3342277, 3437287); + round(re, 23, 4108315, -2437823, 286988, 342297); + round(re, 24, -3595838, -768622, -525098, -3556995); + round(re, 25, 3207046, 2031748, -3122442, -655327); + round(re, 26, -522500, -43260, -1613174, 495491); + round(re, 27, 819034, 909542, 1859098, 900702); + round(re, 28, -3193378, -1197226, -3759364, -3520352); + round(re, 29, 3513181, -1235728, 2434439, 266997); + round(re, 30, -3562462, -2446433, 2244091, -3342478); + round(re, 31, 3817976, 2316500, 3407706, 2091667); +} + +#[inline(always)] +fn invert_ntt_at_layer_1(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { + #[inline(always)] + fn round( + re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT], + index: usize, + zeta_00: i32, + zeta_01: i32, + ) { + re[index] = simd_unit_invert_ntt_at_layer_1(re[index], zeta_00, zeta_01); + } + + round(re, 0, 3839961, -3628969); + round(re, 1, -3881060, -3019102); + round(re, 2, -1439742, -812732); + round(re, 3, -1584928, 1285669); + round(re, 4, 1341330, 1315589); + round(re, 5, -177440, -2409325); + round(re, 6, -1851402, 3159746); + round(re, 7, -3553272, 189548); + round(re, 8, -1316856, 759969); + round(re, 9, -210977, 2389356); + round(re, 10, -3249728, 1653064); + round(re, 11, -8578, -3724342); + round(re, 12, 3958618, 904516); + round(re, 13, -1100098, 44288); + round(re, 14, 3097992, 508951); + round(re, 15, 264944, -3343383); + round(re, 16, -1430430, 1852771); + round(re, 17, 1349076, -381987); + round(re, 18, -1308169, -22981); + round(re, 19, -1228525, -671102); + round(re, 20, -2477047, -411027); + round(re, 21, -3693493, -2967645); + round(re, 22, 2715295, 2147896); + round(re, 23, -983419, 3412210); + round(re, 24, 126922, -3632928); + round(re, 25, -3157330, -3190144); + round(re, 26, -1000202, -4083598); + round(re, 27, 1939314, -1257611); + round(re, 28, -1585221, 2176455); + round(re, 29, 3475950, -1452451); + round(re, 30, -3041255, -3677745); + round(re, 31, -1528703, -3930395); +} + +#[inline(always)] +fn invert_ntt_at_layer_2(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { + fn round(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT], index: usize, zeta1: i32) { + re[index] = simd_unit_invert_ntt_at_layer_2(re[index], zeta1); + } + + round(re, 0, -2797779); + round(re, 1, 2071892); + round(re, 2, -2556880); + round(re, 3, 3900724); + round(re, 4, 3881043); + round(re, 5, 954230); + round(re, 6, 531354); + round(re, 7, 811944); + round(re, 8, 3699596); + round(re, 9, -1600420); + round(re, 10, -2140649); + round(re, 11, 3507263); + round(re, 12, -3821735); + round(re, 13, 3505694); + round(re, 14, -1643818); + round(re, 15, -1699267); + round(re, 16, -539299); + round(re, 17, 2348700); + round(re, 18, -300467); + round(re, 19, 3539968); + round(re, 20, -2867647); + round(re, 21, 3574422); + round(re, 22, -3043716); + round(re, 23, -3861115); + round(re, 24, 3915439); + round(re, 25, -2537516); + round(re, 26, -3592148); + round(re, 27, -1661693); + round(re, 28, 3530437); + round(re, 29, 3077325); + round(re, 30, 95776); + round(re, 31, 2706023); +} + +#[inline(always)] +fn outer_3_plus( + re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT], +) { + for j in OFFSET..OFFSET + STEP_BY { + let a_minus_b = arithmetic::subtract(&re[j + STEP_BY], &re[j]); + re[j] = arithmetic::add(&re[j], &re[j + STEP_BY]); + re[j + STEP_BY] = arithmetic::montgomery_multiply_by_constant(a_minus_b, ZETA); + } + () +} + +#[inline(always)] +fn invert_ntt_at_layer_3(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 8; // 1 << LAYER; + const STEP_BY: usize = 1; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 280005>(re); + outer_3_plus::<{ (1 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 4010497>(re); + outer_3_plus::<{ (2 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -19422>(re); + outer_3_plus::<{ (3 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 1757237>(re); + outer_3_plus::<{ (4 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -3277672>(re); + outer_3_plus::<{ (5 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -1399561>(re); + outer_3_plus::<{ (6 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -3859737>(re); + outer_3_plus::<{ (7 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2118186>(re); + outer_3_plus::<{ (8 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2108549>(re); + outer_3_plus::<{ (9 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 2619752>(re); + outer_3_plus::<{ (10 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -1119584>(re); + outer_3_plus::<{ (11 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -549488>(re); + outer_3_plus::<{ (12 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 3585928>(re); + outer_3_plus::<{ (13 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -1079900>(re); + outer_3_plus::<{ (14 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 1024112>(re); + outer_3_plus::<{ (15 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 2725464>(re); +} + +#[inline(always)] +fn invert_ntt_at_layer_4(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 16; // 1 << LAYER; + const STEP_BY: usize = 2; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 2680103>(re); + outer_3_plus::<{ (1 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 3111497>(re); + outer_3_plus::<{ (2 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2884855>(re); + outer_3_plus::<{ (3 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 3119733>(re); + outer_3_plus::<{ (4 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2091905>(re); + outer_3_plus::<{ (5 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -359251>(re); + outer_3_plus::<{ (6 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 2353451>(re); + outer_3_plus::<{ (7 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 1826347>(re); +} + +#[inline(always)] +fn invert_ntt_at_layer_5(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 32; // 1 << LAYER; + const STEP_BY: usize = 4; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 466468>(re); + outer_3_plus::<{ (1 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -876248>(re); + outer_3_plus::<{ (2 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -777960>(re); + outer_3_plus::<{ (3 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 237124>(re); +} + +#[inline(always)] +fn invert_ntt_at_layer_6(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 64; // 1 << LAYER; + const STEP_BY: usize = 8; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -518909>(re); + outer_3_plus::<{ (1 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, -2608894>(re); +} + +#[inline(always)] +fn invert_ntt_at_layer_7(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { + const STEP: usize = 128; // 1 << LAYER; + const STEP_BY: usize = 16; // step / COEFFICIENTS_IN_SIMD_UNIT; + + outer_3_plus::<{ (0 * STEP * 2) / COEFFICIENTS_IN_SIMD_UNIT }, STEP_BY, 25847>(re); +} + +pub(crate) fn invert_ntt_montgomery( + mut re: [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT], +) -> [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT] { + invert_ntt_at_layer_0(&mut re); + invert_ntt_at_layer_1(&mut re); + invert_ntt_at_layer_2(&mut re); + invert_ntt_at_layer_3(&mut re); + invert_ntt_at_layer_4(&mut re); + invert_ntt_at_layer_5(&mut re); + invert_ntt_at_layer_6(&mut re); + invert_ntt_at_layer_7(&mut re); + + for i in 0..re.len() { + // After invert_ntt_at_layer, elements are of the form a * MONTGOMERY_R^{-1} + // we multiply by (MONTGOMERY_R^2) * (1/2^8) mod Q = 41,978 to both: + // + // - Divide the elements by 256 and + // - Convert the elements form montgomery domain to the standard domain. + re[i] = arithmetic::montgomery_multiply_by_constant(re[i], 41_978); + } + + re +} diff --git a/libcrux-ml-dsa/src/simd/portable/ntt.rs b/libcrux-ml-dsa/src/simd/portable/ntt.rs index b074ceffb..c632f3cf8 100644 --- a/libcrux-ml-dsa/src/simd/portable/ntt.rs +++ b/libcrux-ml-dsa/src/simd/portable/ntt.rs @@ -1,6 +1,4 @@ -use super::arithmetic::{ - self, montgomery_multiply_by_constant, montgomery_multiply_fe_by_fer, MontgomeryFieldElement, -}; +use super::arithmetic::{self, montgomery_multiply_by_constant, montgomery_multiply_fe_by_fer}; use super::vector_type::PortableSIMDUnit; use crate::simd::traits::{COEFFICIENTS_IN_SIMD_UNIT, SIMD_UNITS_IN_RING_ELEMENT}; @@ -77,79 +75,6 @@ pub fn simd_unit_ntt_at_layer_2(mut simd_unit: PortableSIMDUnit, zeta: i32) -> P simd_unit } -#[inline(always)] -pub fn invert_ntt_at_layer_0( - mut simd_unit: PortableSIMDUnit, - zeta0: i32, - zeta1: i32, - zeta2: i32, - zeta3: i32, -) -> PortableSIMDUnit { - let a_minus_b = simd_unit.coefficients[1] - simd_unit.coefficients[0]; - simd_unit.coefficients[0] = simd_unit.coefficients[0] + simd_unit.coefficients[1]; - simd_unit.coefficients[1] = montgomery_multiply_fe_by_fer(a_minus_b, zeta0); - - let a_minus_b = simd_unit.coefficients[3] - simd_unit.coefficients[2]; - simd_unit.coefficients[2] = simd_unit.coefficients[2] + simd_unit.coefficients[3]; - simd_unit.coefficients[3] = montgomery_multiply_fe_by_fer(a_minus_b, zeta1); - - let a_minus_b = simd_unit.coefficients[5] - simd_unit.coefficients[4]; - simd_unit.coefficients[4] = simd_unit.coefficients[4] + simd_unit.coefficients[5]; - simd_unit.coefficients[5] = montgomery_multiply_fe_by_fer(a_minus_b, zeta2); - - let a_minus_b = simd_unit.coefficients[7] - simd_unit.coefficients[6]; - simd_unit.coefficients[6] = simd_unit.coefficients[6] + simd_unit.coefficients[7]; - simd_unit.coefficients[7] = montgomery_multiply_fe_by_fer(a_minus_b, zeta3); - - simd_unit -} - -#[inline(always)] -pub fn invert_ntt_at_layer_1( - mut simd_unit: PortableSIMDUnit, - zeta0: i32, - zeta1: i32, -) -> PortableSIMDUnit { - let a_minus_b = simd_unit.coefficients[2] - simd_unit.coefficients[0]; - simd_unit.coefficients[0] = simd_unit.coefficients[0] + simd_unit.coefficients[2]; - simd_unit.coefficients[2] = montgomery_multiply_fe_by_fer(a_minus_b, zeta0); - - let a_minus_b = simd_unit.coefficients[3] - simd_unit.coefficients[1]; - simd_unit.coefficients[1] = simd_unit.coefficients[1] + simd_unit.coefficients[3]; - simd_unit.coefficients[3] = montgomery_multiply_fe_by_fer(a_minus_b, zeta0); - - let a_minus_b = simd_unit.coefficients[6] - simd_unit.coefficients[4]; - simd_unit.coefficients[4] = simd_unit.coefficients[4] + simd_unit.coefficients[6]; - simd_unit.coefficients[6] = montgomery_multiply_fe_by_fer(a_minus_b, zeta1); - - let a_minus_b = simd_unit.coefficients[7] - simd_unit.coefficients[5]; - simd_unit.coefficients[5] = simd_unit.coefficients[5] + simd_unit.coefficients[7]; - simd_unit.coefficients[7] = montgomery_multiply_fe_by_fer(a_minus_b, zeta1); - - simd_unit -} - -#[inline(always)] -pub fn invert_ntt_at_layer_2(mut simd_unit: PortableSIMDUnit, zeta: i32) -> PortableSIMDUnit { - let a_minus_b = simd_unit.coefficients[4] - simd_unit.coefficients[0]; - simd_unit.coefficients[0] = simd_unit.coefficients[0] + simd_unit.coefficients[4]; - simd_unit.coefficients[4] = montgomery_multiply_fe_by_fer(a_minus_b, zeta); - - let a_minus_b = simd_unit.coefficients[5] - simd_unit.coefficients[1]; - simd_unit.coefficients[1] = simd_unit.coefficients[1] + simd_unit.coefficients[5]; - simd_unit.coefficients[5] = montgomery_multiply_fe_by_fer(a_minus_b, zeta); - - let a_minus_b = simd_unit.coefficients[6] - simd_unit.coefficients[2]; - simd_unit.coefficients[2] = simd_unit.coefficients[2] + simd_unit.coefficients[6]; - simd_unit.coefficients[6] = montgomery_multiply_fe_by_fer(a_minus_b, zeta); - - let a_minus_b = simd_unit.coefficients[7] - simd_unit.coefficients[3]; - simd_unit.coefficients[3] = simd_unit.coefficients[3] + simd_unit.coefficients[7]; - simd_unit.coefficients[7] = montgomery_multiply_fe_by_fer(a_minus_b, zeta); - - simd_unit -} - #[inline(always)] fn ntt_at_layer_0(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { #[inline(always)] @@ -286,7 +211,7 @@ fn ntt_at_layer_2(re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT]) { } #[inline(always)] -fn outer_3_plus( +fn outer_3_plus( re: &mut [PortableSIMDUnit; SIMD_UNITS_IN_RING_ELEMENT], ) { for j in OFFSET..OFFSET + STEP_BY { diff --git a/libcrux-ml-dsa/src/simd/traits.rs b/libcrux-ml-dsa/src/simd/traits.rs index a5cf98c00..d851dab1a 100644 --- a/libcrux-ml-dsa/src/simd/traits.rs +++ b/libcrux-ml-dsa/src/simd/traits.rs @@ -31,7 +31,6 @@ pub(crate) trait Operations: Copy + Clone { // Modular operations fn montgomery_multiply(lhs: Self, rhs: Self) -> Self; - fn montgomery_multiply_by_constant(simd_unit: Self, c: i32) -> Self; fn shift_left_then_reduce(simd_unit: Self) -> Self; // Decomposition operations @@ -75,37 +74,8 @@ pub(crate) trait Operations: Copy + Clone { // NTT fn ntt(simd_units: [Self; SIMD_UNITS_IN_RING_ELEMENT]) -> [Self; SIMD_UNITS_IN_RING_ELEMENT]; - // Inverse NTT - fn invert_ntt_at_layer_0( - simd_unit0: Self, - simd_unit1: Self, - zeta00: i32, - zeta01: i32, - zeta02: i32, - zeta03: i32, - zeta10: i32, - zeta11: i32, - zeta12: i32, - zeta13: i32, - ) -> (Self, Self); - fn invert_ntt_at_layer_1( - simd_unit0: Self, - simd_unit1: Self, - zeta00: i32, - zeta01: i32, - zeta10: i32, - zeta11: i32, - ) -> (Self, Self); - fn invert_ntt_at_layer_2( - simd_unit0: Self, - simd_unit1: Self, - zeta0: i32, - zeta1: i32, - ) -> (Self, Self); -} - -// hax does not support trait with default implementations, so we use the -// following pattern. -pub fn montgomery_multiply_by_fer(simd_unit: S, fer: i32) -> S { - S::montgomery_multiply_by_constant(simd_unit, fer) + // invert NTT and convert to standard domain + fn invert_ntt_montgomery( + simd_units: [Self; SIMD_UNITS_IN_RING_ELEMENT], + ) -> [Self; SIMD_UNITS_IN_RING_ELEMENT]; } From 6982493caf7d3b35640b9c8354a98e93f17c2a35 Mon Sep 17 00:00:00 2001 From: Jonas Schneider-Bensch Date: Thu, 14 Nov 2024 12:02:05 +0100 Subject: [PATCH 3/5] FStar Update --- .../fstar/extraction/Libcrux_ml_dsa.Ntt.fst | 727 +----------------- .../fstar/extraction/Libcrux_ml_dsa.Ntt.fsti | 126 +-- .../Libcrux_ml_dsa.Simd.Avx2.Ntt.fst | 38 +- .../Libcrux_ml_dsa.Simd.Avx2.Ntt.fsti | 9 - .../extraction/Libcrux_ml_dsa.Simd.Avx2.fsti | 138 +--- .../Libcrux_ml_dsa.Simd.Portable.Ntt.fst | 434 ----------- .../Libcrux_ml_dsa.Simd.Portable.Ntt.fsti | 21 - .../Libcrux_ml_dsa.Simd.Portable.fsti | 86 +-- .../Libcrux_ml_dsa.Simd.Traits.fsti | 33 +- 9 files changed, 54 insertions(+), 1558 deletions(-) diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Ntt.fst b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Ntt.fst index 25a5d7d91..05275542e 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Ntt.fst +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Ntt.fst @@ -9,459 +9,22 @@ let _ = let open Libcrux_ml_dsa.Simd.Traits in () -let invert_ntt_at_layer_0___round - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - (index: usize) - (zeta_0_ zeta_1_ zeta_2_ zeta_3_: i32) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - { - re with - Libcrux_ml_dsa.Polynomial.f_simd_units - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re - .Libcrux_ml_dsa.Polynomial.f_simd_units - index - (Libcrux_ml_dsa.Simd.Traits.f_invert_ntt_at_layer_0_ #v_SIMDUnit - #FStar.Tactics.Typeclasses.solve - (re.Libcrux_ml_dsa.Polynomial.f_simd_units.[ index ] <: v_SIMDUnit) - zeta_0_ - zeta_1_ - zeta_2_ - zeta_3_ - <: - v_SIMDUnit) - } - <: - Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit - in - re - -let invert_ntt_at_layer_0_ - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 0) 1976782l (-846154l) 1400424l 3937738l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 1) (-1362209l) (-48306l) 3919660l (-554416l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 2) (-3545687l) 1612842l (-976891l) 183443l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit - re - (sz 3) - (-2286327l) - (-420899l) - (-2235985l) - (-2939036l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit - re - (sz 4) - (-3833893l) - (-260646l) - (-1104333l) - (-1667432l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 5) 1910376l (-1803090l) 1723600l (-426683l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 6) 472078l 1717735l (-975884l) 2213111l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 7) 269760l 3866901l 3523897l (-3038916l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 8) (-1799107l) (-3694233l) 1652634l 810149l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 9) 3014001l 1616392l 162844l (-3183426l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 10) (-1207385l) 185531l 3369112l 1957272l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 11) (-164721l) 2454455l 2432395l (-2013608l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 12) (-3776993l) 594136l (-3724270l) (-2584293l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit - re - (sz 13) - (-1846953l) - (-1671176l) - (-2831860l) - (-542412l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 14) 3406031l 2235880l 777191l 1500165l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit - re - (sz 15) - (-1374803l) - (-2546312l) - 1917081l - (-1279661l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 16) (-1962642l) 3306115l 1312455l (-451100l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit - re - (sz 17) - (-1430225l) - (-3318210l) - 1237275l - (-1333058l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 18) (-1050970l) 1903435l 1869119l (-2994039l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 19) (-3548272l) 2635921l 1250494l (-3767016l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 20) 1595974l 2486353l 1247620l 4055324l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 21) 1265009l (-2590150l) 2691481l 2842341l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 22) 203044l 1735879l (-3342277l) 3437287l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 23) 4108315l (-2437823l) 286988l 342297l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit - re - (sz 24) - (-3595838l) - (-768622l) - (-525098l) - (-3556995l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 25) 3207046l 2031748l (-3122442l) (-655327l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 26) (-522500l) (-43260l) (-1613174l) 495491l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 27) 819034l 909542l 1859098l 900702l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit - re - (sz 28) - (-3193378l) - (-1197226l) - (-3759364l) - (-3520352l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 29) 3513181l (-1235728l) 2434439l 266997l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit - re - (sz 30) - (-3562462l) - (-2446433l) - 2244091l - (-3342478l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0___round #v_SIMDUnit re (sz 31) 3817976l 2316500l 3407706l 2091667l - in - re - -let invert_ntt_at_layer_1___round - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - (index: usize) - (zeta_0_ zeta_1_: i32) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - { - re with - Libcrux_ml_dsa.Polynomial.f_simd_units - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re - .Libcrux_ml_dsa.Polynomial.f_simd_units - index - (Libcrux_ml_dsa.Simd.Traits.f_invert_ntt_at_layer_1_ #v_SIMDUnit - #FStar.Tactics.Typeclasses.solve - (re.Libcrux_ml_dsa.Polynomial.f_simd_units.[ index ] <: v_SIMDUnit) - zeta_0_ - zeta_1_ - <: - v_SIMDUnit) - } - <: - Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit - in - re - -let invert_ntt_at_layer_1_ - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 0) 3839961l (-3628969l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 1) (-3881060l) (-3019102l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 2) (-1439742l) (-812732l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 3) (-1584928l) 1285669l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 4) 1341330l 1315589l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 5) (-177440l) (-2409325l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 6) (-1851402l) 3159746l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 7) (-3553272l) 189548l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 8) (-1316856l) 759969l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 9) (-210977l) 2389356l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 10) (-3249728l) 1653064l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 11) (-8578l) (-3724342l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 12) 3958618l 904516l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 13) (-1100098l) 44288l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 14) 3097992l 508951l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 15) 264944l (-3343383l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 16) (-1430430l) 1852771l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 17) 1349076l (-381987l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 18) (-1308169l) (-22981l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 19) (-1228525l) (-671102l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 20) (-2477047l) (-411027l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 21) (-3693493l) (-2967645l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 22) 2715295l 2147896l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 23) (-983419l) 3412210l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 24) 126922l (-3632928l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 25) (-3157330l) (-3190144l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 26) (-1000202l) (-4083598l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 27) 1939314l (-1257611l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 28) (-1585221l) 2176455l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 29) 3475950l (-1452451l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 30) (-3041255l) (-3677745l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1___round #v_SIMDUnit re (sz 31) (-1528703l) (-3930395l) - in - re - -let invert_ntt_at_layer_2___round - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - (index: usize) - (zeta: i32) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - { - re with - Libcrux_ml_dsa.Polynomial.f_simd_units - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re - .Libcrux_ml_dsa.Polynomial.f_simd_units - index - (Libcrux_ml_dsa.Simd.Traits.f_invert_ntt_at_layer_2_ #v_SIMDUnit - #FStar.Tactics.Typeclasses.solve - (re.Libcrux_ml_dsa.Polynomial.f_simd_units.[ index ] <: v_SIMDUnit) - zeta - <: - v_SIMDUnit) - } - <: - Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit - in - re - -let invert_ntt_at_layer_2_ +let invert_ntt_montgomery (#v_SIMDUnit: Type0) (#[FStar.Tactics.Typeclasses.tcresolve ()] i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 0) (-2797779l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 1) 2071892l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 2) (-2556880l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 3) 3900724l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 4) 3881043l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 5) 954230l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 6) 531354l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 7) 811944l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 8) 3699596l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 9) (-1600420l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 10) (-2140649l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 11) 3507263l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 12) (-3821735l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 13) 3505694l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 14) (-1643818l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 15) (-1699267l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 16) (-539299l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 17) 2348700l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 18) (-300467l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 19) 3539968l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 20) (-2867647l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 21) 3574422l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 22) (-3043716l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 23) (-3861115l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 24) 3915439l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 25) (-2537516l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 26) (-3592148l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 27) (-1661693l) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 28) 3530437l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 29) 3077325l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 30) 95776l - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2___round #v_SIMDUnit re (sz 31) 2706023l - in - re + { + Libcrux_ml_dsa.Polynomial.f_simd_units + = + Libcrux_ml_dsa.Simd.Traits.f_invert_ntt_montgomery #v_SIMDUnit + #FStar.Tactics.Typeclasses.solve + re.Libcrux_ml_dsa.Polynomial.f_simd_units + } + <: + Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit let ntt (#v_SIMDUnit: Type0) @@ -480,276 +43,6 @@ let ntt <: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit -let outer_3_plus - (#v_SIMDUnit: Type0) - (v_OFFSET v_STEP_BY: usize) - (v_ZETA: i32) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - Rust_primitives.Hax.Folds.fold_range v_OFFSET - (v_OFFSET +! v_STEP_BY <: usize) - (fun re temp_1_ -> - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = re in - let _:usize = temp_1_ in - true) - re - (fun re j -> - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = re in - let j:usize = j in - let a_minus_b:v_SIMDUnit = - Libcrux_ml_dsa.Simd.Traits.f_subtract #v_SIMDUnit - #FStar.Tactics.Typeclasses.solve - (re.Libcrux_ml_dsa.Polynomial.f_simd_units.[ j +! v_STEP_BY <: usize ] <: v_SIMDUnit) - (re.Libcrux_ml_dsa.Polynomial.f_simd_units.[ j ] <: v_SIMDUnit) - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - { - re with - Libcrux_ml_dsa.Polynomial.f_simd_units - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re - .Libcrux_ml_dsa.Polynomial.f_simd_units - j - (Libcrux_ml_dsa.Simd.Traits.f_add #v_SIMDUnit - #FStar.Tactics.Typeclasses.solve - (re.Libcrux_ml_dsa.Polynomial.f_simd_units.[ j ] <: v_SIMDUnit) - (re.Libcrux_ml_dsa.Polynomial.f_simd_units.[ j +! v_STEP_BY <: usize ] - <: - v_SIMDUnit) - <: - v_SIMDUnit) - } - <: - Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - { - re with - Libcrux_ml_dsa.Polynomial.f_simd_units - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re - .Libcrux_ml_dsa.Polynomial.f_simd_units - (j +! v_STEP_BY <: usize) - (Libcrux_ml_dsa.Simd.Traits.montgomery_multiply_by_fer #v_SIMDUnit a_minus_b v_ZETA - <: - v_SIMDUnit) - } - <: - Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit - in - re) - in - let hax_temp_output:Prims.unit = () <: Prims.unit in - re - -let invert_ntt_at_layer_3_ - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 0) (sz 1) 280005l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 2) (sz 1) 4010497l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 4) (sz 1) (-19422l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 6) (sz 1) 1757237l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 8) (sz 1) (-3277672l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 10) (sz 1) (-1399561l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 12) (sz 1) (-3859737l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 14) (sz 1) (-2118186l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 16) (sz 1) (-2108549l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 18) (sz 1) 2619752l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 20) (sz 1) (-1119584l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 22) (sz 1) (-549488l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 24) (sz 1) 3585928l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 26) (sz 1) (-1079900l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 28) (sz 1) 1024112l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 30) (sz 1) 2725464l re - in - re - -let invert_ntt_at_layer_4_ - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 0) (sz 2) 2680103l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 4) (sz 2) 3111497l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 8) (sz 2) (-2884855l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 12) (sz 2) 3119733l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 16) (sz 2) (-2091905l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 20) (sz 2) (-359251l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 24) (sz 2) 2353451l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 28) (sz 2) 1826347l re - in - re - -let invert_ntt_at_layer_5_ - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 0) (sz 4) 466468l re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 8) (sz 4) (-876248l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 16) (sz 4) (-777960l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 24) (sz 4) 237124l re - in - re - -let invert_ntt_at_layer_6_ - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 0) (sz 8) (-518909l) re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 16) (sz 8) (-2608894l) re - in - re - -let invert_ntt_at_layer_7_ - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - outer_3_plus #v_SIMDUnit (sz 0) (sz 16) 25847l re - in - re - -let invert_ntt_montgomery - (#v_SIMDUnit: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] - i1: - Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit) - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - = - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_0_ #v_SIMDUnit re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_1_ #v_SIMDUnit re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_2_ #v_SIMDUnit re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_3_ #v_SIMDUnit re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_4_ #v_SIMDUnit re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_5_ #v_SIMDUnit re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_6_ #v_SIMDUnit re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - invert_ntt_at_layer_7_ #v_SIMDUnit re - in - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = - Rust_primitives.Hax.Folds.fold_range (sz 0) - (Core.Slice.impl__len #v_SIMDUnit - (re.Libcrux_ml_dsa.Polynomial.f_simd_units <: t_Slice v_SIMDUnit) - <: - usize) - (fun re temp_1_ -> - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = re in - let _:usize = temp_1_ in - true) - re - (fun re i -> - let re:Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit = re in - let i:usize = i in - { - re with - Libcrux_ml_dsa.Polynomial.f_simd_units - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re - .Libcrux_ml_dsa.Polynomial.f_simd_units - i - (Libcrux_ml_dsa.Simd.Traits.f_montgomery_multiply_by_constant #v_SIMDUnit - #FStar.Tactics.Typeclasses.solve - (re.Libcrux_ml_dsa.Polynomial.f_simd_units.[ i ] <: v_SIMDUnit) - 41978l - <: - v_SIMDUnit) - <: - t_Array v_SIMDUnit (sz 32) - } - <: - Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - in - re - let ntt_multiply_montgomery (#v_SIMDUnit: Type0) (#[FStar.Tactics.Typeclasses.tcresolve ()] diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Ntt.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Ntt.fsti index 15b336a66..1c6b919dc 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Ntt.fsti +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Ntt.fsti @@ -9,73 +9,7 @@ let _ = let open Libcrux_ml_dsa.Simd.Traits in () -let invert_ntt_at_layer_3___STEP: usize = sz 8 - -let invert_ntt_at_layer_3___STEP_BY: usize = sz 1 - -let invert_ntt_at_layer_4___STEP: usize = sz 16 - -let invert_ntt_at_layer_4___STEP_BY: usize = sz 2 - -let invert_ntt_at_layer_5___STEP: usize = sz 32 - -let invert_ntt_at_layer_5___STEP_BY: usize = sz 4 - -let invert_ntt_at_layer_6___STEP: usize = sz 64 - -let invert_ntt_at_layer_6___STEP_BY: usize = sz 8 - -let invert_ntt_at_layer_7___STEP: usize = sz 128 - -let invert_ntt_at_layer_7___STEP_BY: usize = sz 16 - -val invert_ntt_at_layer_0___round - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - (index: usize) - (zeta_0_ zeta_1_ zeta_2_ zeta_3_: i32) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_0_ - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_1___round - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - (index: usize) - (zeta_0_ zeta_1_: i32) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_1_ - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_2___round - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - (index: usize) - (zeta: i32) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_2_ +val invert_ntt_montgomery (#v_SIMDUnit: Type0) {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) @@ -91,64 +25,6 @@ val ntt Prims.l_True (fun _ -> Prims.l_True) -val outer_3_plus - (#v_SIMDUnit: Type0) - (v_OFFSET v_STEP_BY: usize) - (v_ZETA: i32) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_3_ - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_4_ - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_5_ - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_6_ - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_7_ - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_montgomery - (#v_SIMDUnit: Type0) - {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} - (re: Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - : Prims.Pure (Libcrux_ml_dsa.Polynomial.t_PolynomialRingElement v_SIMDUnit) - Prims.l_True - (fun _ -> Prims.l_True) - val ntt_multiply_montgomery (#v_SIMDUnit: Type0) {| i1: Libcrux_ml_dsa.Simd.Traits.t_Operations v_SIMDUnit |} diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Ntt.fst b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Ntt.fst index 72db9fe4d..02add82bf 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Ntt.fst +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Ntt.fst @@ -126,43 +126,6 @@ let butterfly_8_ (a b: u8) (zeta0 zeta1: i32) = let b_out:u8 = Libcrux_intrinsics.Avx2_extract.mm256_permute2x128_si256 19l sub_terms add_terms in a_out, b_out <: (u8 & u8) -let invert_ntt_at_layer_0_ (simd_unit: u8) (zeta0 zeta1 zeta2 zeta3: i32) = - let zetas:u8 = - Libcrux_intrinsics.Avx2_extract.mm256_set_epi32 zeta3 0l zeta2 0l zeta1 0l zeta0 0l - in - let add_by_signs:u8 = - Libcrux_intrinsics.Avx2_extract.mm256_set_epi32 (-1l) 1l (-1l) 1l (-1l) 1l (-1l) 1l - in - let add_by:u8 = Libcrux_intrinsics.Avx2_extract.mm256_shuffle_epi32 177l simd_unit in - let add_by:u8 = Libcrux_intrinsics.Avx2_extract.mm256_mullo_epi32 add_by add_by_signs in - let sums:u8 = Libcrux_intrinsics.Avx2_extract.mm256_add_epi32 simd_unit add_by in - let products:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.montgomery_multiply sums zetas in - Libcrux_intrinsics.Avx2_extract.mm256_blend_epi32 170l sums products - -let invert_ntt_at_layer_1_ (simd_unit: u8) (zeta0 zeta1: i32) = - let zetas:u8 = - Libcrux_intrinsics.Avx2_extract.mm256_set_epi32 zeta1 zeta1 0l 0l zeta0 zeta0 0l 0l - in - let add_by_signs:u8 = - Libcrux_intrinsics.Avx2_extract.mm256_set_epi32 (-1l) (-1l) 1l 1l (-1l) (-1l) 1l 1l - in - let add_by:u8 = Libcrux_intrinsics.Avx2_extract.mm256_shuffle_epi32 78l simd_unit in - let add_by:u8 = Libcrux_intrinsics.Avx2_extract.mm256_mullo_epi32 add_by add_by_signs in - let sums:u8 = Libcrux_intrinsics.Avx2_extract.mm256_add_epi32 simd_unit add_by in - let products:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.montgomery_multiply sums zetas in - Libcrux_intrinsics.Avx2_extract.mm256_blend_epi32 204l sums products - -let invert_ntt_at_layer_2_ (simd_unit: u8) (zeta: i32) = - let zetas:u8 = Libcrux_intrinsics.Avx2_extract.mm256_set_epi32 zeta zeta zeta zeta 0l 0l 0l 0l in - let add_by_signs:u8 = - Libcrux_intrinsics.Avx2_extract.mm256_set_epi32 (-1l) (-1l) (-1l) (-1l) 1l 1l 1l 1l - in - let add_by:u8 = Libcrux_intrinsics.Avx2_extract.mm256_permute4x64_epi64 78l simd_unit in - let add_by:u8 = Libcrux_intrinsics.Avx2_extract.mm256_mullo_epi32 add_by add_by_signs in - let sums:u8 = Libcrux_intrinsics.Avx2_extract.mm256_add_epi32 simd_unit add_by in - let products:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.montgomery_multiply sums zetas in - Libcrux_intrinsics.Avx2_extract.mm256_blend_epi32 240l sums products - let ntt_at_layer_0___round (re: t_Array u8 (sz 32)) (index: usize) @@ -714,4 +677,5 @@ let ntt (re: t_Array u8 (sz 32)) = let re:t_Array u8 (sz 32) = ntt_at_layer_2_ re in let re:t_Array u8 (sz 32) = ntt_at_layer_1_ re in let re:t_Array u8 (sz 32) = ntt_at_layer_0_ re in + let _:Prims.unit = () in re diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Ntt.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Ntt.fsti index b0253f5ed..bb33358bf 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Ntt.fsti +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Ntt.fsti @@ -43,15 +43,6 @@ val butterfly_4_ (a b: u8) (zeta_a0 zeta_a1 zeta_b0 zeta_b1: i32) val butterfly_8_ (a b: u8) (zeta0 zeta1: i32) : Prims.Pure (u8 & u8) Prims.l_True (fun _ -> Prims.l_True) -val invert_ntt_at_layer_0_ (simd_unit: u8) (zeta0 zeta1 zeta2 zeta3: i32) - : Prims.Pure u8 Prims.l_True (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_1_ (simd_unit: u8) (zeta0 zeta1: i32) - : Prims.Pure u8 Prims.l_True (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_2_ (simd_unit: u8) (zeta: i32) - : Prims.Pure u8 Prims.l_True (fun _ -> Prims.l_True) - val ntt_at_layer_0___round (re: t_Array u8 (sz 32)) (index: usize) diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.fsti index 46926e5bb..8ff985c8c 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.fsti +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.fsti @@ -101,28 +101,6 @@ Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit = rhs.Libcrux_ml_dsa.Simd.Avx2.Vector_type.f_coefficients <: u8)); - f_montgomery_multiply_by_constant_pre - = - (fun (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) (constant: i32) -> true); - f_montgomery_multiply_by_constant_post - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - (constant: i32) - (out: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - -> - true); - f_montgomery_multiply_by_constant - = - (fun (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) (constant: i32) -> - Core.Convert.f_into #u8 - #Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit - #FStar.Tactics.Typeclasses.solve - (Libcrux_ml_dsa.Simd.Avx2.Arithmetic.montgomery_multiply_by_constant simd_unit - .Libcrux_ml_dsa.Simd.Avx2.Vector_type.f_coefficients - constant - <: - u8)); f_montgomery_multiply_pre = (fun @@ -537,100 +515,40 @@ Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit = x <: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit)); - f_invert_ntt_at_layer_0_pre - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - (zeta0: i32) - (zeta1: i32) - (zeta2: i32) - (zeta3: i32) - -> - true); - f_invert_ntt_at_layer_0_post - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - (zeta0: i32) - (zeta1: i32) - (zeta2: i32) - (zeta3: i32) - (out: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - -> - true); - f_invert_ntt_at_layer_0_ - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - (zeta0: i32) - (zeta1: i32) - (zeta2: i32) - (zeta3: i32) - -> - Core.Convert.f_into #u8 - #Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit - #FStar.Tactics.Typeclasses.solve - (Libcrux_ml_dsa.Simd.Avx2.Ntt.invert_ntt_at_layer_0_ simd_unit - .Libcrux_ml_dsa.Simd.Avx2.Vector_type.f_coefficients - zeta0 - zeta1 - zeta2 - zeta3 - <: - u8)); - f_invert_ntt_at_layer_1_pre + f_invert_ntt_montgomery_pre = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - (zeta0: i32) - (zeta1: i32) - -> - true); - f_invert_ntt_at_layer_1_post + (fun (simd_units: t_Array Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit (sz 32)) -> true); + f_invert_ntt_montgomery_post = (fun - (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - (zeta0: i32) - (zeta1: i32) - (out: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) + (simd_units: t_Array Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit (sz 32)) + (out: t_Array Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit (sz 32)) -> true); - f_invert_ntt_at_layer_1_ + f_invert_ntt_montgomery = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - (zeta0: i32) - (zeta1: i32) - -> - Core.Convert.f_into #u8 - #Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit - #FStar.Tactics.Typeclasses.solve - (Libcrux_ml_dsa.Simd.Avx2.Ntt.invert_ntt_at_layer_1_ simd_unit - .Libcrux_ml_dsa.Simd.Avx2.Vector_type.f_coefficients - zeta0 - zeta1 + fun (simd_units: t_Array Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit (sz 32)) -> + let result:t_Array u8 (sz 32) = + Libcrux_ml_dsa.Simd.Avx2.Invntt.invert_ntt_montgomery (Core.Array.impl_23__map #Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit + (sz 32) + #u8 + simd_units + (fun x -> + let x:Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit = x in + x.Libcrux_ml_dsa.Simd.Avx2.Vector_type.f_coefficients) <: - u8)); - f_invert_ntt_at_layer_2_pre - = - (fun (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) (zeta: i32) -> true); - f_invert_ntt_at_layer_2_post - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - (zeta: i32) - (out: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) - -> - true); - f_invert_ntt_at_layer_2_ - = - fun (simd_unit: Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) (zeta: i32) -> - Core.Convert.f_into #u8 + t_Array u8 (sz 32)) + in + Core.Array.impl_23__map #u8 + (sz 32) #Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit - #FStar.Tactics.Typeclasses.solve - (Libcrux_ml_dsa.Simd.Avx2.Ntt.invert_ntt_at_layer_2_ simd_unit - .Libcrux_ml_dsa.Simd.Avx2.Vector_type.f_coefficients - zeta - <: - u8) + result + (fun x -> + let x:u8 = x in + Core.Convert.f_into #u8 + #Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit + #FStar.Tactics.Typeclasses.solve + x + <: + Libcrux_ml_dsa.Simd.Avx2.Vector_type.t_AVX2SIMDUnit) } diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Ntt.fst b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Ntt.fst index 5cddf2bbf..6e1832690 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Ntt.fst +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Ntt.fst @@ -3,440 +3,6 @@ module Libcrux_ml_dsa.Simd.Portable.Ntt open Core open FStar.Mul -let invert_ntt_at_layer_0_ - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0 zeta1 zeta2 zeta3: i32) - = - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 0) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 1) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta0 - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 2) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 3) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta1 - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 4) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 5) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta2 - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 6) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 7) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta3 - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - simd_unit - -let invert_ntt_at_layer_1_ - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0 zeta1: i32) - = - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 0) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 2) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta0 - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 1) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 3) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta0 - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 4) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 6) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta1 - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 5) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 7) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta1 - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - simd_unit - -let invert_ntt_at_layer_2_ - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta: i32) - = - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 0) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 4) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta <: i32 - ) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 1) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 5) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta <: i32 - ) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 2) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 6) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta <: i32 - ) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let a_minus_b:i32 = - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) -! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 3) - ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) +! - (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) - <: - i32) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = - { - simd_unit with - Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - = - Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit - .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients - (sz 7) - (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta <: i32 - ) - } - <: - Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - in - simd_unit - let simd_unit_ntt_at_layer_0_ (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) (zeta0 zeta1 zeta2 zeta3: i32) diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Ntt.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Ntt.fsti index ae1f422e4..08682c48d 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Ntt.fsti +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Ntt.fsti @@ -23,27 +23,6 @@ let ntt_at_layer_7___STEP: usize = sz 128 let ntt_at_layer_7___STEP_BY: usize = sz 16 -val invert_ntt_at_layer_0_ - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0 zeta1 zeta2 zeta3: i32) - : Prims.Pure Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_1_ - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0 zeta1: i32) - : Prims.Pure Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - Prims.l_True - (fun _ -> Prims.l_True) - -val invert_ntt_at_layer_2_ - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta: i32) - : Prims.Pure Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit - Prims.l_True - (fun _ -> Prims.l_True) - val simd_unit_ntt_at_layer_0_ (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) (zeta0 zeta1 zeta2 zeta3: i32) diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.fsti index 4b05f75c3..b5c72724c 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.fsti +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.fsti @@ -87,21 +87,6 @@ Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = (rhs: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) -> Libcrux_ml_dsa.Simd.Portable.Arithmetic.subtract lhs rhs); - f_montgomery_multiply_by_constant_pre - = - (fun (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) (c: i32) -> true); - f_montgomery_multiply_by_constant_post - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (c: i32) - (out: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - -> - true); - f_montgomery_multiply_by_constant - = - (fun (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) (c: i32) -> - Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_by_constant simd_unit c); f_montgomery_multiply_pre = (fun @@ -457,76 +442,21 @@ Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = (simd_units: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) -> Libcrux_ml_dsa.Simd.Portable.Ntt.ntt simd_units); - f_invert_ntt_at_layer_0_pre + f_invert_ntt_montgomery_pre = (fun - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0: i32) - (zeta1: i32) - (zeta2: i32) - (zeta3: i32) - -> - true); - f_invert_ntt_at_layer_0_post - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0: i32) - (zeta1: i32) - (zeta2: i32) - (zeta3: i32) - (out: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - -> - true); - f_invert_ntt_at_layer_0_ - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0: i32) - (zeta1: i32) - (zeta2: i32) - (zeta3: i32) - -> - Libcrux_ml_dsa.Simd.Portable.Ntt.invert_ntt_at_layer_0_ simd_unit zeta0 zeta1 zeta2 zeta3); - f_invert_ntt_at_layer_1_pre - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0: i32) - (zeta1: i32) - -> - true); - f_invert_ntt_at_layer_1_post - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0: i32) - (zeta1: i32) - (out: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - -> - true); - f_invert_ntt_at_layer_1_ - = - (fun - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta0: i32) - (zeta1: i32) + (simd_units: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) -> - Libcrux_ml_dsa.Simd.Portable.Ntt.invert_ntt_at_layer_1_ simd_unit zeta0 zeta1); - f_invert_ntt_at_layer_2_pre - = - (fun (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) (zeta: i32) -> true); - f_invert_ntt_at_layer_2_post + f_invert_ntt_montgomery_post = (fun - (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) - (zeta: i32) - (out: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + (simd_units: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + (out: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) -> true); - f_invert_ntt_at_layer_2_ + f_invert_ntt_montgomery = - fun (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) (zeta: i32) -> - Libcrux_ml_dsa.Simd.Portable.Ntt.invert_ntt_at_layer_2_ simd_unit zeta + fun (simd_units: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) -> + Libcrux_ml_dsa.Simd.Portable.Invntt.invert_ntt_montgomery simd_units } diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Traits.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Traits.fsti index 543e2b390..280e421e6 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Traits.fsti +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Traits.fsti @@ -59,12 +59,6 @@ class t_Operations (v_Self: Type0) = { -> Prims.Pure v_Self (f_montgomery_multiply_pre x0 x1) (fun result -> f_montgomery_multiply_post x0 x1 result); - f_montgomery_multiply_by_constant_pre:v_Self -> i32 -> Type0; - f_montgomery_multiply_by_constant_post:v_Self -> i32 -> v_Self -> Type0; - f_montgomery_multiply_by_constant:x0: v_Self -> x1: i32 - -> Prims.Pure v_Self - (f_montgomery_multiply_by_constant_pre x0 x1) - (fun result -> f_montgomery_multiply_by_constant_post x0 x1 result); f_shift_left_then_reduce_pre:v_SHIFT_BY: i32 -> v_Self -> Type0; f_shift_left_then_reduce_post:v_SHIFT_BY: i32 -> v_Self -> v_Self -> Type0; f_shift_left_then_reduce:v_SHIFT_BY: i32 -> x0: v_Self @@ -152,24 +146,12 @@ class t_Operations (v_Self: Type0) = { f_ntt_post:t_Array v_Self (sz 32) -> t_Array v_Self (sz 32) -> Type0; f_ntt:x0: t_Array v_Self (sz 32) -> Prims.Pure (t_Array v_Self (sz 32)) (f_ntt_pre x0) (fun result -> f_ntt_post x0 result); - f_invert_ntt_at_layer_0_pre:v_Self -> i32 -> i32 -> i32 -> i32 -> Type0; - f_invert_ntt_at_layer_0_post:v_Self -> i32 -> i32 -> i32 -> i32 -> v_Self -> Type0; - f_invert_ntt_at_layer_0_:x0: v_Self -> x1: i32 -> x2: i32 -> x3: i32 -> x4: i32 - -> Prims.Pure v_Self - (f_invert_ntt_at_layer_0_pre x0 x1 x2 x3 x4) - (fun result -> f_invert_ntt_at_layer_0_post x0 x1 x2 x3 x4 result); - f_invert_ntt_at_layer_1_pre:v_Self -> i32 -> i32 -> Type0; - f_invert_ntt_at_layer_1_post:v_Self -> i32 -> i32 -> v_Self -> Type0; - f_invert_ntt_at_layer_1_:x0: v_Self -> x1: i32 -> x2: i32 - -> Prims.Pure v_Self - (f_invert_ntt_at_layer_1_pre x0 x1 x2) - (fun result -> f_invert_ntt_at_layer_1_post x0 x1 x2 result); - f_invert_ntt_at_layer_2_pre:v_Self -> i32 -> Type0; - f_invert_ntt_at_layer_2_post:v_Self -> i32 -> v_Self -> Type0; - f_invert_ntt_at_layer_2_:x0: v_Self -> x1: i32 - -> Prims.Pure v_Self - (f_invert_ntt_at_layer_2_pre x0 x1) - (fun result -> f_invert_ntt_at_layer_2_post x0 x1 result) + f_invert_ntt_montgomery_pre:t_Array v_Self (sz 32) -> Type0; + f_invert_ntt_montgomery_post:t_Array v_Self (sz 32) -> t_Array v_Self (sz 32) -> Type0; + f_invert_ntt_montgomery:x0: t_Array v_Self (sz 32) + -> Prims.Pure (t_Array v_Self (sz 32)) + (f_invert_ntt_montgomery_pre x0) + (fun result -> f_invert_ntt_montgomery_post x0 result) } let v_COEFFICIENTS_IN_SIMD_UNIT: usize = sz 8 @@ -180,6 +162,3 @@ let v_INVERSE_OF_MODULUS_MOD_MONTGOMERY_R: u64 = 58728449uL let v_SIMD_UNITS_IN_RING_ELEMENT: usize = Libcrux_ml_dsa.Constants.v_COEFFICIENTS_IN_RING_ELEMENT /! v_COEFFICIENTS_IN_SIMD_UNIT - -val montgomery_multiply_by_fer (#v_S: Type0) {| i1: t_Operations v_S |} (simd_unit: v_S) (fer: i32) - : Prims.Pure v_S Prims.l_True (fun _ -> Prims.l_True) From 3daab9b7a0f44003e3e9b9ae66a8440b3f7db734 Mon Sep 17 00:00:00 2001 From: Jonas Schneider-Bensch Date: Thu, 14 Nov 2024 13:10:16 +0100 Subject: [PATCH 4/5] Omitted FStar --- .../Libcrux_ml_dsa.Simd.Avx2.Invntt.fst | 375 ++++++ .../Libcrux_ml_dsa.Simd.Avx2.Invntt.fsti | 82 ++ .../Libcrux_ml_dsa.Simd.Portable.Invntt.fst | 1015 +++++++++++++++++ .../Libcrux_ml_dsa.Simd.Portable.Invntt.fsti | 131 +++ 4 files changed, 1603 insertions(+) create mode 100644 libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Invntt.fst create mode 100644 libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Invntt.fsti create mode 100644 libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Invntt.fst create mode 100644 libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Invntt.fsti diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Invntt.fst b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Invntt.fst new file mode 100644 index 000000000..53d285487 --- /dev/null +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Invntt.fst @@ -0,0 +1,375 @@ +module Libcrux_ml_dsa.Simd.Avx2.Invntt +#set-options "--fuel 0 --ifuel 1 --z3rlimit 100" +open Core +open FStar.Mul + +let simd_unit_invert_ntt_at_layer_0_ + (simd_unit0 simd_unit1: u8) + (zeta00 zeta01 zeta02 zeta03 zeta10 zeta11 zeta12 zeta13: i32) + = + let a_shuffled:u8 = Libcrux_intrinsics.Avx2_extract.mm256_shuffle_epi32 216l simd_unit0 in + let b_shuffled:u8 = Libcrux_intrinsics.Avx2_extract.mm256_shuffle_epi32 216l simd_unit1 in + let lo_values:u8 = Libcrux_intrinsics.Avx2_extract.mm256_unpacklo_epi64 a_shuffled b_shuffled in + let hi_values:u8 = Libcrux_intrinsics.Avx2_extract.mm256_unpackhi_epi64 a_shuffled b_shuffled in + let sums:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.add lo_values hi_values in + let differences:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.subtract hi_values lo_values in + let zetas:u8 = + Libcrux_intrinsics.Avx2_extract.mm256_set_epi32 zeta13 + zeta12 + zeta03 + zeta02 + zeta11 + zeta10 + zeta01 + zeta00 + in + let products:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.montgomery_multiply differences zetas in + let a_shuffled:u8 = Libcrux_intrinsics.Avx2_extract.mm256_unpacklo_epi64 sums products in + let b_shuffled:u8 = Libcrux_intrinsics.Avx2_extract.mm256_unpackhi_epi64 sums products in + let a:u8 = Libcrux_intrinsics.Avx2_extract.mm256_shuffle_epi32 216l a_shuffled in + let b:u8 = Libcrux_intrinsics.Avx2_extract.mm256_shuffle_epi32 216l b_shuffled in + a, b <: (u8 & u8) + +let invert_ntt_at_layer_0___round + (re: t_Array u8 (sz 32)) + (index: usize) + (zeta00 zeta01 zeta02 zeta03 zeta10 zeta11 zeta12 zeta13: i32) + = + let lhs, lhs_1_:(u8 & u8) = + simd_unit_invert_ntt_at_layer_0_ (re.[ index ] <: u8) (re.[ index +! sz 1 <: usize ] <: u8) + zeta00 zeta01 zeta02 zeta03 zeta10 zeta11 zeta12 zeta13 + in + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re index lhs + in + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re (index +! sz 1 <: usize) lhs_1_ + in + let _:Prims.unit = () in + re + +let invert_ntt_at_layer_0_ (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 0) 1976782l (-846154l) 1400424l 3937738l (-1362209l) + (-48306l) 3919660l (-554416l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 2) (-3545687l) 1612842l (-976891l) 183443l (-2286327l) + (-420899l) (-2235985l) (-2939036l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 4) (-3833893l) (-260646l) (-1104333l) (-1667432l) 1910376l + (-1803090l) 1723600l (-426683l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 6) 472078l 1717735l (-975884l) 2213111l 269760l 3866901l + 3523897l (-3038916l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 8) (-1799107l) (-3694233l) 1652634l 810149l 3014001l + 1616392l 162844l (-3183426l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 10) (-1207385l) 185531l 3369112l 1957272l (-164721l) + 2454455l 2432395l (-2013608l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 12) (-3776993l) 594136l (-3724270l) (-2584293l) (-1846953l) + (-1671176l) (-2831860l) (-542412l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 14) 3406031l 2235880l 777191l 1500165l (-1374803l) + (-2546312l) 1917081l (-1279661l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 16) (-1962642l) 3306115l 1312455l (-451100l) (-1430225l) + (-3318210l) 1237275l (-1333058l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 18) (-1050970l) 1903435l 1869119l (-2994039l) (-3548272l) + 2635921l 1250494l (-3767016l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 20) 1595974l 2486353l 1247620l 4055324l 1265009l + (-2590150l) 2691481l 2842341l + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 22) 203044l 1735879l (-3342277l) 3437287l 4108315l + (-2437823l) 286988l 342297l + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 24) (-3595838l) (-768622l) (-525098l) (-3556995l) 3207046l + 2031748l (-3122442l) (-655327l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 26) (-522500l) (-43260l) (-1613174l) 495491l 819034l + 909542l 1859098l 900702l + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 28) (-3193378l) (-1197226l) (-3759364l) (-3520352l) + 3513181l (-1235728l) 2434439l 266997l + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_0___round re (sz 30) (-3562462l) (-2446433l) 2244091l (-3342478l) 3817976l + 2316500l 3407706l 2091667l + in + re + +let simd_unit_invert_ntt_at_layer_1_ (simd_unit0 simd_unit1: u8) (zeta00 zeta01 zeta10 zeta11: i32) = + let lo_values:u8 = Libcrux_intrinsics.Avx2_extract.mm256_unpacklo_epi64 simd_unit0 simd_unit1 in + let hi_values:u8 = Libcrux_intrinsics.Avx2_extract.mm256_unpackhi_epi64 simd_unit0 simd_unit1 in + let sums:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.add lo_values hi_values in + let differences:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.subtract hi_values lo_values in + let zetas:u8 = + Libcrux_intrinsics.Avx2_extract.mm256_set_epi32 zeta11 + zeta11 + zeta01 + zeta01 + zeta10 + zeta10 + zeta00 + zeta00 + in + let products:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.montgomery_multiply differences zetas in + let a:u8 = Libcrux_intrinsics.Avx2_extract.mm256_unpacklo_epi64 sums products in + let b:u8 = Libcrux_intrinsics.Avx2_extract.mm256_unpackhi_epi64 sums products in + a, b <: (u8 & u8) + +let invert_ntt_at_layer_1___round + (re: t_Array u8 (sz 32)) + (index: usize) + (zeta_00_ zeta_01_ zeta_10_ zeta_11_: i32) + = + let lhs, lhs_1_:(u8 & u8) = + simd_unit_invert_ntt_at_layer_1_ (re.[ index ] <: u8) + (re.[ index +! sz 1 <: usize ] <: u8) + zeta_00_ + zeta_01_ + zeta_10_ + zeta_11_ + in + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re index lhs + in + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re (index +! sz 1 <: usize) lhs_1_ + in + let _:Prims.unit = () in + re + +let invert_ntt_at_layer_1_ (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 0) 3839961l (-3628969l) (-3881060l) (-3019102l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 2) (-1439742l) (-812732l) (-1584928l) 1285669l + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 4) 1341330l 1315589l (-177440l) (-2409325l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 6) (-1851402l) 3159746l (-3553272l) 189548l + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 8) (-1316856l) 759969l (-210977l) 2389356l + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 10) (-3249728l) 1653064l (-8578l) (-3724342l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 12) 3958618l 904516l (-1100098l) 44288l + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 14) 3097992l 508951l 264944l (-3343383l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 16) (-1430430l) 1852771l 1349076l (-381987l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 18) (-1308169l) (-22981l) (-1228525l) (-671102l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 20) (-2477047l) (-411027l) (-3693493l) (-2967645l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 22) 2715295l 2147896l (-983419l) 3412210l + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 24) 126922l (-3632928l) (-3157330l) (-3190144l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 26) (-1000202l) (-4083598l) 1939314l (-1257611l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 28) (-1585221l) 2176455l 3475950l (-1452451l) + in + let re:t_Array u8 (sz 32) = + invert_ntt_at_layer_1___round re (sz 30) (-3041255l) (-3677745l) (-1528703l) (-3930395l) + in + re + +let simd_unit_invert_ntt_at_layer_2_ (simd_unit0 simd_unit1: u8) (zeta0 zeta1: i32) = + let lo_values:u8 = + Libcrux_intrinsics.Avx2_extract.mm256_permute2x128_si256 32l simd_unit0 simd_unit1 + in + let hi_values:u8 = + Libcrux_intrinsics.Avx2_extract.mm256_permute2x128_si256 49l simd_unit0 simd_unit1 + in + let sums:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.add lo_values hi_values in + let differences:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.subtract hi_values lo_values in + let zetas:u8 = + Libcrux_intrinsics.Avx2_extract.mm256_set_epi32 zeta1 zeta1 zeta1 zeta1 zeta0 zeta0 zeta0 zeta0 + in + let products:u8 = Libcrux_ml_dsa.Simd.Avx2.Arithmetic.montgomery_multiply differences zetas in + let a:u8 = Libcrux_intrinsics.Avx2_extract.mm256_permute2x128_si256 32l sums products in + let b:u8 = Libcrux_intrinsics.Avx2_extract.mm256_permute2x128_si256 49l sums products in + a, b <: (u8 & u8) + +let invert_ntt_at_layer_2___round (re: t_Array u8 (sz 32)) (index: usize) (zeta1 zeta2: i32) = + let lhs, lhs_1_:(u8 & u8) = + simd_unit_invert_ntt_at_layer_2_ (re.[ index ] <: u8) + (re.[ index +! sz 1 <: usize ] <: u8) + zeta1 + zeta2 + in + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re index lhs + in + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re (index +! sz 1 <: usize) lhs_1_ + in + let _:Prims.unit = () in + re + +let invert_ntt_at_layer_2_ (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 0) (-2797779l) 2071892l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 2) (-2556880l) 3900724l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 4) 3881043l 954230l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 6) 531354l 811944l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 8) 3699596l (-1600420l) in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 10) (-2140649l) 3507263l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 12) (-3821735l) 3505694l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 14) (-1643818l) (-1699267l) in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 16) (-539299l) 2348700l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 18) (-300467l) 3539968l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 20) (-2867647l) 3574422l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 22) (-3043716l) (-3861115l) in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 24) 3915439l (-2537516l) in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 26) (-3592148l) (-1661693l) in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 28) 3530437l 3077325l in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2___round re (sz 30) 95776l 2706023l in + re + +let outer_3_plus (v_OFFSET v_STEP_BY: usize) (v_ZETA: i32) (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Folds.fold_range v_OFFSET + (v_OFFSET +! v_STEP_BY <: usize) + (fun re temp_1_ -> + let re:t_Array u8 (sz 32) = re in + let _:usize = temp_1_ in + true) + re + (fun re j -> + let re:t_Array u8 (sz 32) = re in + let j:usize = j in + let a_minus_b:u8 = + Libcrux_ml_dsa.Simd.Avx2.Arithmetic.subtract (re.[ j +! v_STEP_BY <: usize ] <: u8) + (re.[ j ] <: u8) + in + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re + j + (Libcrux_ml_dsa.Simd.Avx2.Arithmetic.add (re.[ j ] <: u8) + (re.[ j +! v_STEP_BY <: usize ] <: u8) + <: + u8) + in + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re + (j +! v_STEP_BY <: usize) + (Libcrux_ml_dsa.Simd.Avx2.Arithmetic.montgomery_multiply_by_constant a_minus_b v_ZETA + <: + u8) + in + re) + in + let hax_temp_output:Prims.unit = () <: Prims.unit in + re + +let invert_ntt_at_layer_3_ (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = outer_3_plus (sz 0) (sz 1) 280005l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 2) (sz 1) 4010497l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 4) (sz 1) (-19422l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 6) (sz 1) 1757237l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 8) (sz 1) (-3277672l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 10) (sz 1) (-1399561l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 12) (sz 1) (-3859737l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 14) (sz 1) (-2118186l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 16) (sz 1) (-2108549l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 18) (sz 1) 2619752l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 20) (sz 1) (-1119584l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 22) (sz 1) (-549488l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 24) (sz 1) 3585928l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 26) (sz 1) (-1079900l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 28) (sz 1) 1024112l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 30) (sz 1) 2725464l re in + re + +let invert_ntt_at_layer_4_ (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = outer_3_plus (sz 0) (sz 2) 2680103l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 4) (sz 2) 3111497l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 8) (sz 2) (-2884855l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 12) (sz 2) 3119733l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 16) (sz 2) (-2091905l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 20) (sz 2) (-359251l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 24) (sz 2) 2353451l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 28) (sz 2) 1826347l re in + re + +let invert_ntt_at_layer_5_ (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = outer_3_plus (sz 0) (sz 4) 466468l re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 8) (sz 4) (-876248l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 16) (sz 4) (-777960l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 24) (sz 4) 237124l re in + re + +let invert_ntt_at_layer_6_ (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = outer_3_plus (sz 0) (sz 8) (-518909l) re in + let re:t_Array u8 (sz 32) = outer_3_plus (sz 16) (sz 8) (-2608894l) re in + re + +let invert_ntt_at_layer_7_ (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = outer_3_plus (sz 0) (sz 16) 25847l re in + re + +let invert_ntt_montgomery (re: t_Array u8 (sz 32)) = + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_0_ re in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_1_ re in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_2_ re in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_3_ re in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_4_ re in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_5_ re in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_6_ re in + let re:t_Array u8 (sz 32) = invert_ntt_at_layer_7_ re in + let _:Prims.unit = () in + let re:t_Array u8 (sz 32) = + Rust_primitives.Hax.Folds.fold_range (sz 0) + (Core.Slice.impl__len #u8 (re <: t_Slice u8) <: usize) + (fun re temp_1_ -> + let re:t_Array u8 (sz 32) = re in + let _:usize = temp_1_ in + true) + re + (fun re i -> + let re:t_Array u8 (sz 32) = re in + let i:usize = i in + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re + i + (Libcrux_ml_dsa.Simd.Avx2.Arithmetic.montgomery_multiply_by_constant (re.[ i ] <: u8) + 41978l + <: + u8) + <: + t_Array u8 (sz 32)) + in + re diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Invntt.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Invntt.fsti new file mode 100644 index 000000000..9422d42f8 --- /dev/null +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Avx2.Invntt.fsti @@ -0,0 +1,82 @@ +module Libcrux_ml_dsa.Simd.Avx2.Invntt +#set-options "--fuel 0 --ifuel 1 --z3rlimit 100" +open Core +open FStar.Mul + +let invert_ntt_at_layer_3___STEP: usize = sz 8 + +let invert_ntt_at_layer_3___STEP_BY: usize = sz 1 + +let invert_ntt_at_layer_4___STEP: usize = sz 16 + +let invert_ntt_at_layer_4___STEP_BY: usize = sz 2 + +let invert_ntt_at_layer_5___STEP: usize = sz 32 + +let invert_ntt_at_layer_5___STEP_BY: usize = sz 4 + +let invert_ntt_at_layer_6___STEP: usize = sz 64 + +let invert_ntt_at_layer_6___STEP_BY: usize = sz 8 + +let invert_ntt_at_layer_7___STEP: usize = sz 128 + +let invert_ntt_at_layer_7___STEP_BY: usize = sz 16 + +let simd_unit_invert_ntt_at_layer_0___SHUFFLE: i32 = 216l + +val simd_unit_invert_ntt_at_layer_0_ + (simd_unit0 simd_unit1: u8) + (zeta00 zeta01 zeta02 zeta03 zeta10 zeta11 zeta12 zeta13: i32) + : Prims.Pure (u8 & u8) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_0___round + (re: t_Array u8 (sz 32)) + (index: usize) + (zeta00 zeta01 zeta02 zeta03 zeta10 zeta11 zeta12 zeta13: i32) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_0_ (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val simd_unit_invert_ntt_at_layer_1_ (simd_unit0 simd_unit1: u8) (zeta00 zeta01 zeta10 zeta11: i32) + : Prims.Pure (u8 & u8) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_1___round + (re: t_Array u8 (sz 32)) + (index: usize) + (zeta_00_ zeta_01_ zeta_10_ zeta_11_: i32) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_1_ (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val simd_unit_invert_ntt_at_layer_2_ (simd_unit0 simd_unit1: u8) (zeta0 zeta1: i32) + : Prims.Pure (u8 & u8) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_2___round (re: t_Array u8 (sz 32)) (index: usize) (zeta1 zeta2: i32) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_2_ (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val outer_3_plus (v_OFFSET v_STEP_BY: usize) (v_ZETA: i32) (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_3_ (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_4_ (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_5_ (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_6_ (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_7_ (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) + +val invert_ntt_montgomery (re: t_Array u8 (sz 32)) + : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True) diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Invntt.fst b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Invntt.fst new file mode 100644 index 000000000..e4d06be44 --- /dev/null +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Invntt.fst @@ -0,0 +1,1015 @@ +module Libcrux_ml_dsa.Simd.Portable.Invntt +#set-options "--fuel 0 --ifuel 1 --z3rlimit 100" +open Core +open FStar.Mul + +let simd_unit_invert_ntt_at_layer_0_ + (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + (zeta0 zeta1 zeta2 zeta3: i32) + = + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 0) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 1) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta0 + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 2) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 3) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta1 + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 4) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 5) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta2 + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 6) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 7) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta3 + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + simd_unit + +let invert_ntt_at_layer_0___round + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + (index: usize) + (zeta0 zeta1 zeta2 zeta3: i32) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re + index + (simd_unit_invert_ntt_at_layer_0_ (re.[ index ] + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + zeta0 + zeta1 + zeta2 + zeta3 + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + in + re + +let invert_ntt_at_layer_0_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 0) 1976782l (-846154l) 1400424l 3937738l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 1) (-1362209l) (-48306l) 3919660l (-554416l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 2) (-3545687l) 1612842l (-976891l) 183443l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 3) (-2286327l) (-420899l) (-2235985l) (-2939036l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 4) (-3833893l) (-260646l) (-1104333l) (-1667432l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 5) 1910376l (-1803090l) 1723600l (-426683l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 6) 472078l 1717735l (-975884l) 2213111l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 7) 269760l 3866901l 3523897l (-3038916l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 8) (-1799107l) (-3694233l) 1652634l 810149l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 9) 3014001l 1616392l 162844l (-3183426l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 10) (-1207385l) 185531l 3369112l 1957272l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 11) (-164721l) 2454455l 2432395l (-2013608l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 12) (-3776993l) 594136l (-3724270l) (-2584293l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 13) (-1846953l) (-1671176l) (-2831860l) (-542412l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 14) 3406031l 2235880l 777191l 1500165l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 15) (-1374803l) (-2546312l) 1917081l (-1279661l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 16) (-1962642l) 3306115l 1312455l (-451100l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 17) (-1430225l) (-3318210l) 1237275l (-1333058l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 18) (-1050970l) 1903435l 1869119l (-2994039l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 19) (-3548272l) 2635921l 1250494l (-3767016l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 20) 1595974l 2486353l 1247620l 4055324l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 21) 1265009l (-2590150l) 2691481l 2842341l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 22) 203044l 1735879l (-3342277l) 3437287l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 23) 4108315l (-2437823l) 286988l 342297l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 24) (-3595838l) (-768622l) (-525098l) (-3556995l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 25) 3207046l 2031748l (-3122442l) (-655327l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 26) (-522500l) (-43260l) (-1613174l) 495491l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 27) 819034l 909542l 1859098l 900702l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 28) (-3193378l) (-1197226l) (-3759364l) (-3520352l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 29) 3513181l (-1235728l) 2434439l 266997l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 30) (-3562462l) (-2446433l) 2244091l (-3342478l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0___round re (sz 31) 3817976l 2316500l 3407706l 2091667l + in + re + +let simd_unit_invert_ntt_at_layer_1_ + (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + (zeta0 zeta1: i32) + = + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 0) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 2) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta0 + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 1) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 3) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta0 + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 4) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 6) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta1 + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 5) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 7) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta1 + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + simd_unit + +let invert_ntt_at_layer_1___round + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + (index: usize) + (zeta_00_ zeta_01_: i32) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re + index + (simd_unit_invert_ntt_at_layer_1_ (re.[ index ] + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + zeta_00_ + zeta_01_ + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + in + re + +let invert_ntt_at_layer_1_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 0) 3839961l (-3628969l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 1) (-3881060l) (-3019102l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 2) (-1439742l) (-812732l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 3) (-1584928l) 1285669l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 4) 1341330l 1315589l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 5) (-177440l) (-2409325l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 6) (-1851402l) 3159746l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 7) (-3553272l) 189548l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 8) (-1316856l) 759969l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 9) (-210977l) 2389356l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 10) (-3249728l) 1653064l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 11) (-8578l) (-3724342l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 12) 3958618l 904516l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 13) (-1100098l) 44288l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 14) 3097992l 508951l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 15) 264944l (-3343383l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 16) (-1430430l) 1852771l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 17) 1349076l (-381987l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 18) (-1308169l) (-22981l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 19) (-1228525l) (-671102l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 20) (-2477047l) (-411027l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 21) (-3693493l) (-2967645l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 22) 2715295l 2147896l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 23) (-983419l) 3412210l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 24) 126922l (-3632928l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 25) (-3157330l) (-3190144l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 26) (-1000202l) (-4083598l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 27) 1939314l (-1257611l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 28) (-1585221l) 2176455l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 29) 3475950l (-1452451l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 30) (-3041255l) (-3677745l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1___round re (sz 31) (-1528703l) (-3930395l) + in + re + +let simd_unit_invert_ntt_at_layer_2_ + (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + (zeta: i32) + = + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 0) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 0 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 4 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 4) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta <: i32 + ) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 1) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 1 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 5 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 5) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta <: i32 + ) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 2) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 2 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 6 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 6) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta <: i32 + ) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let a_minus_b:i32 = + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) -! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 3) + ((simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 3 ] <: i32) +! + (simd_unit.Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients.[ sz 7 ] <: i32) + <: + i32) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + let simd_unit:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + { + simd_unit with + Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize simd_unit + .Libcrux_ml_dsa.Simd.Portable.Vector_type.f_coefficients + (sz 7) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_fe_by_fer a_minus_b zeta <: i32 + ) + } + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + in + simd_unit + +let invert_ntt_at_layer_2___round + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + (index: usize) + (zeta1: i32) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re + index + (simd_unit_invert_ntt_at_layer_2_ (re.[ index ] + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + zeta1 + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + in + re + +let invert_ntt_at_layer_2_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 0) (-2797779l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 1) 2071892l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 2) (-2556880l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 3) 3900724l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 4) 3881043l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 5) 954230l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 6) 531354l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 7) 811944l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 8) 3699596l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 9) (-1600420l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 10) (-2140649l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 11) 3507263l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 12) (-3821735l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 13) 3505694l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 14) (-1643818l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 15) (-1699267l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 16) (-539299l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 17) 2348700l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 18) (-300467l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 19) 3539968l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 20) (-2867647l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 21) 3574422l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 22) (-3043716l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 23) (-3861115l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 24) 3915439l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 25) (-2537516l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 26) (-3592148l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 27) (-1661693l) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 28) 3530437l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 29) 3077325l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 30) 95776l + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2___round re (sz 31) 2706023l + in + re + +let outer_3_plus + (v_OFFSET v_STEP_BY: usize) + (v_ZETA: i32) + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + Rust_primitives.Hax.Folds.fold_range v_OFFSET + (v_OFFSET +! v_STEP_BY <: usize) + (fun re temp_1_ -> + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = re in + let _:usize = temp_1_ in + true) + re + (fun re j -> + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = re in + let j:usize = j in + let a_minus_b:Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit = + Libcrux_ml_dsa.Simd.Portable.Arithmetic.subtract (re.[ j +! v_STEP_BY <: usize ] + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + (re.[ j ] <: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re + j + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.add (re.[ j ] + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + (re.[ j +! v_STEP_BY <: usize ] + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re + (j +! v_STEP_BY <: usize) + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_by_constant a_minus_b + v_ZETA + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + in + re) + in + let hax_temp_output:Prims.unit = () <: Prims.unit in + re + +let invert_ntt_at_layer_3_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 0) (sz 1) 280005l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 2) (sz 1) 4010497l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 4) (sz 1) (-19422l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 6) (sz 1) 1757237l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 8) (sz 1) (-3277672l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 10) (sz 1) (-1399561l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 12) (sz 1) (-3859737l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 14) (sz 1) (-2118186l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 16) (sz 1) (-2108549l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 18) (sz 1) 2619752l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 20) (sz 1) (-1119584l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 22) (sz 1) (-549488l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 24) (sz 1) 3585928l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 26) (sz 1) (-1079900l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 28) (sz 1) 1024112l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 30) (sz 1) 2725464l re + in + re + +let invert_ntt_at_layer_4_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 0) (sz 2) 2680103l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 4) (sz 2) 3111497l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 8) (sz 2) (-2884855l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 12) (sz 2) 3119733l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 16) (sz 2) (-2091905l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 20) (sz 2) (-359251l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 24) (sz 2) 2353451l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 28) (sz 2) 1826347l re + in + re + +let invert_ntt_at_layer_5_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 0) (sz 4) 466468l re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 8) (sz 4) (-876248l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 16) (sz 4) (-777960l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 24) (sz 4) 237124l re + in + re + +let invert_ntt_at_layer_6_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 0) (sz 8) (-518909l) re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 16) (sz 8) (-2608894l) re + in + re + +let invert_ntt_at_layer_7_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + outer_3_plus (sz 0) (sz 16) 25847l re + in + re + +let invert_ntt_montgomery + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + = + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_0_ re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_1_ re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_2_ re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_3_ re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_4_ re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_5_ re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_6_ re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + invert_ntt_at_layer_7_ re + in + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = + Rust_primitives.Hax.Folds.fold_range (sz 0) + (Core.Slice.impl__len #Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + (re <: t_Slice Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + <: + usize) + (fun re temp_1_ -> + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = re in + let _:usize = temp_1_ in + true) + re + (fun re i -> + let re:t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32) = re in + let i:usize = i in + Rust_primitives.Hax.Monomorphized_update_at.update_at_usize re + i + (Libcrux_ml_dsa.Simd.Portable.Arithmetic.montgomery_multiply_by_constant (re.[ i ] + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + 41978l + <: + Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + <: + t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + in + re diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Invntt.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Invntt.fsti new file mode 100644 index 000000000..341dd3468 --- /dev/null +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Portable.Invntt.fsti @@ -0,0 +1,131 @@ +module Libcrux_ml_dsa.Simd.Portable.Invntt +#set-options "--fuel 0 --ifuel 1 --z3rlimit 100" +open Core +open FStar.Mul + +let invert_ntt_at_layer_3___STEP: usize = sz 8 + +let invert_ntt_at_layer_3___STEP_BY: usize = sz 1 + +let invert_ntt_at_layer_4___STEP: usize = sz 16 + +let invert_ntt_at_layer_4___STEP_BY: usize = sz 2 + +let invert_ntt_at_layer_5___STEP: usize = sz 32 + +let invert_ntt_at_layer_5___STEP_BY: usize = sz 4 + +let invert_ntt_at_layer_6___STEP: usize = sz 64 + +let invert_ntt_at_layer_6___STEP_BY: usize = sz 8 + +let invert_ntt_at_layer_7___STEP: usize = sz 128 + +let invert_ntt_at_layer_7___STEP_BY: usize = sz 16 + +val simd_unit_invert_ntt_at_layer_0_ + (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + (zeta0 zeta1 zeta2 zeta3: i32) + : Prims.Pure Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_0___round + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + (index: usize) + (zeta0 zeta1 zeta2 zeta3: i32) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_0_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val simd_unit_invert_ntt_at_layer_1_ + (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + (zeta0 zeta1: i32) + : Prims.Pure Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_1___round + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + (index: usize) + (zeta_00_ zeta_01_: i32) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_1_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val simd_unit_invert_ntt_at_layer_2_ + (simd_unit: Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit) + (zeta: i32) + : Prims.Pure Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_2___round + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + (index: usize) + (zeta1: i32) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_2_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val outer_3_plus + (v_OFFSET v_STEP_BY: usize) + (v_ZETA: i32) + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_3_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_4_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_5_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_6_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_at_layer_7_ + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) + +val invert_ntt_montgomery + (re: t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + : Prims.Pure (t_Array Libcrux_ml_dsa.Simd.Portable.Vector_type.t_PortableSIMDUnit (sz 32)) + Prims.l_True + (fun _ -> Prims.l_True) From c967919cafbb9ece765388a677691a405f2f2185 Mon Sep 17 00:00:00 2001 From: Jonas Schneider-Bensch Date: Thu, 14 Nov 2024 15:53:01 +0100 Subject: [PATCH 5/5] Remove obsolete .fst --- .../fstar/extraction/Libcrux_ml_dsa.Simd.Traits.fst | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Traits.fst diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Traits.fst b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Traits.fst deleted file mode 100644 index 5bf547714..000000000 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Simd.Traits.fst +++ /dev/null @@ -1,11 +0,0 @@ -module Libcrux_ml_dsa.Simd.Traits -#set-options "--fuel 0 --ifuel 1 --z3rlimit 100" -open Core -open FStar.Mul - -let montgomery_multiply_by_fer - (#v_S: Type0) - (#[FStar.Tactics.Typeclasses.tcresolve ()] i1: t_Operations v_S) - (simd_unit: v_S) - (fer: i32) - = f_montgomery_multiply_by_constant #v_S #FStar.Tactics.Typeclasses.solve simd_unit fer