diff --git a/src/fp.rs b/src/fp.rs index 1a25fdf7..4dd893e2 100644 --- a/src/fp.rs +++ b/src/fp.rs @@ -9,12 +9,58 @@ use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption}; use crate::util::{adc, mac, sbb}; +mod magnitudes; + +pub use magnitudes::*; + // The internal representation of this type is six 64-bit unsigned // integers in little-endian order. `Fp` values are always in // Montgomery form; i.e., Scalar(a) = aR mod p, with R = 2^384. #[derive(Copy, Clone)] pub struct Fp([u64; 6]); +#[derive(Debug)] +pub struct FpWide([u64; 12]); + +impl FpWide { + pub const fn add(&self, rhs: &FpWide) -> FpWide { + let (d0, carry) = adc(self.0[0], rhs.0[0], 0); + let (d1, carry) = adc(self.0[1], rhs.0[1], carry); + let (d2, carry) = adc(self.0[2], rhs.0[2], carry); + let (d3, carry) = adc(self.0[3], rhs.0[3], carry); + let (d4, carry) = adc(self.0[4], rhs.0[4], carry); + let (d5, carry) = adc(self.0[5], rhs.0[5], carry); + let (d6, carry) = adc(self.0[6], rhs.0[6], carry); + let (d7, carry) = adc(self.0[7], rhs.0[7], carry); + let (d8, carry) = adc(self.0[8], rhs.0[8], carry); + let (d9, carry) = adc(self.0[9], rhs.0[9], carry); + let (d10, carry) = adc(self.0[10], rhs.0[10], carry); + let (d11, _) = adc(self.0[11], rhs.0[11], carry); + + FpWide([d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11]) + } + + /// Negates an element of magnitude 1 by subtracting it from a multiple of + /// the prime that is larger than this element could be (p^2), increasing + /// the magnitude by 1. + pub const fn negate(&self) -> FpWide { + let (d0, borrow) = sbb(::P2[0], self.0[0], 0); + let (d1, borrow) = sbb(::P2[1], self.0[1], borrow); + let (d2, borrow) = sbb(::P2[2], self.0[2], borrow); + let (d3, borrow) = sbb(::P2[3], self.0[3], borrow); + let (d4, borrow) = sbb(::P2[4], self.0[4], borrow); + let (d5, borrow) = sbb(::P2[5], self.0[5], borrow); + let (d6, borrow) = sbb(::P2[6], self.0[6], borrow); + let (d7, borrow) = sbb(::P2[7], self.0[7], borrow); + let (d8, borrow) = sbb(::P2[8], self.0[8], borrow); + let (d9, borrow) = sbb(::P2[9], self.0[9], borrow); + let (d10, borrow) = sbb(::P2[10], self.0[10], borrow); + let (d11, _) = sbb(::P2[11], self.0[11], borrow); + + FpWide([d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11]) + } +} + impl fmt::Debug for Fp { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let tmp = self.to_bytes(); @@ -199,9 +245,9 @@ impl Fp { pub fn to_bytes(&self) -> [u8; 48] { // Turn into canonical form by computing // (a.R) / R = a - let tmp = Fp::montgomery_reduce( + let tmp = Fp::montgomery_reduce(FpWide([ self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], 0, 0, 0, 0, 0, 0, - ); + ])); let mut res = [0; 48]; res[0..8].copy_from_slice(&tmp.0[5].to_be_bytes()); @@ -223,9 +269,9 @@ impl Fp { // (p - 1) // 2. // First, because self is in Montgomery form we need to reduce it - let tmp = Fp::montgomery_reduce( + let tmp = Fp::montgomery_reduce(FpWide([ self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], 0, 0, 0, 0, 0, 0, - ); + ])); let (_, borrow) = sbb(tmp.0[0], 0xdcff7fffffffd556, 0); let (_, borrow) = sbb(tmp.0[1], 0x0f55ffff58a9ffff, borrow); @@ -369,32 +415,19 @@ impl Fp { } #[inline(always)] - const fn montgomery_reduce( - t0: u64, - t1: u64, - t2: u64, - t3: u64, - t4: u64, - t5: u64, - t6: u64, - t7: u64, - t8: u64, - t9: u64, - t10: u64, - t11: u64, - ) -> Self { + pub const fn montgomery_reduce(t: FpWide) -> Self { // The Montgomery reduction here is based on Algorithm 14.32 in // Handbook of Applied Cryptography // . - let k = t0.wrapping_mul(INV); - let (_, carry) = mac(t0, k, MODULUS[0], 0); - let (r1, carry) = mac(t1, k, MODULUS[1], carry); - let (r2, carry) = mac(t2, k, MODULUS[2], carry); - let (r3, carry) = mac(t3, k, MODULUS[3], carry); - let (r4, carry) = mac(t4, k, MODULUS[4], carry); - let (r5, carry) = mac(t5, k, MODULUS[5], carry); - let (r6, r7) = adc(t6, 0, carry); + let k = t.0[0].wrapping_mul(INV); + let (_, carry) = mac(t.0[0], k, MODULUS[0], 0); + let (r1, carry) = mac(t.0[1], k, MODULUS[1], carry); + let (r2, carry) = mac(t.0[2], k, MODULUS[2], carry); + let (r3, carry) = mac(t.0[3], k, MODULUS[3], carry); + let (r4, carry) = mac(t.0[4], k, MODULUS[4], carry); + let (r5, carry) = mac(t.0[5], k, MODULUS[5], carry); + let (r6, r7) = adc(t.0[6], 0, carry); let k = r1.wrapping_mul(INV); let (_, carry) = mac(r1, k, MODULUS[0], 0); @@ -403,7 +436,7 @@ impl Fp { let (r4, carry) = mac(r4, k, MODULUS[3], carry); let (r5, carry) = mac(r5, k, MODULUS[4], carry); let (r6, carry) = mac(r6, k, MODULUS[5], carry); - let (r7, r8) = adc(t7, r7, carry); + let (r7, r8) = adc(t.0[7], r7, carry); let k = r2.wrapping_mul(INV); let (_, carry) = mac(r2, k, MODULUS[0], 0); @@ -412,7 +445,7 @@ impl Fp { let (r5, carry) = mac(r5, k, MODULUS[3], carry); let (r6, carry) = mac(r6, k, MODULUS[4], carry); let (r7, carry) = mac(r7, k, MODULUS[5], carry); - let (r8, r9) = adc(t8, r8, carry); + let (r8, r9) = adc(t.0[8], r8, carry); let k = r3.wrapping_mul(INV); let (_, carry) = mac(r3, k, MODULUS[0], 0); @@ -421,7 +454,7 @@ impl Fp { let (r6, carry) = mac(r6, k, MODULUS[3], carry); let (r7, carry) = mac(r7, k, MODULUS[4], carry); let (r8, carry) = mac(r8, k, MODULUS[5], carry); - let (r9, r10) = adc(t9, r9, carry); + let (r9, r10) = adc(t.0[9], r9, carry); let k = r4.wrapping_mul(INV); let (_, carry) = mac(r4, k, MODULUS[0], 0); @@ -430,7 +463,7 @@ impl Fp { let (r7, carry) = mac(r7, k, MODULUS[3], carry); let (r8, carry) = mac(r8, k, MODULUS[4], carry); let (r9, carry) = mac(r9, k, MODULUS[5], carry); - let (r10, r11) = adc(t10, r10, carry); + let (r10, r11) = adc(t.0[10], r10, carry); let k = r5.wrapping_mul(INV); let (_, carry) = mac(r5, k, MODULUS[0], 0); @@ -439,7 +472,7 @@ impl Fp { let (r8, carry) = mac(r8, k, MODULUS[3], carry); let (r9, carry) = mac(r9, k, MODULUS[4], carry); let (r10, carry) = mac(r10, k, MODULUS[5], carry); - let (r11, _) = adc(t11, r11, carry); + let (r11, _) = adc(t.0[11], r11, carry); // Attempt to subtract the modulus, to ensure the value // is smaller than the modulus. @@ -448,6 +481,11 @@ impl Fp { #[inline] pub const fn mul(&self, rhs: &Fp) -> Fp { + Self::montgomery_reduce(self.mul_unreduced(rhs)) + } + + #[inline] + pub const fn mul_unreduced(&self, rhs: &Fp) -> FpWide { let (t0, carry) = mac(0, self.0[0], rhs.0[0], 0); let (t1, carry) = mac(0, self.0[0], rhs.0[1], carry); let (t2, carry) = mac(0, self.0[0], rhs.0[2], carry); @@ -490,12 +528,18 @@ impl Fp { let (t9, carry) = mac(t9, self.0[5], rhs.0[4], carry); let (t10, t11) = mac(t10, self.0[5], rhs.0[5], carry); - Self::montgomery_reduce(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) + FpWide([t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11]) } /// Squares this element. #[inline] pub const fn square(&self) -> Self { + Self::montgomery_reduce(self.square_unreduced()) + } + + /// Squares this element, returning the wide value. + #[inline] + pub const fn square_unreduced(&self) -> FpWide { let (t1, carry) = mac(0, self.0[0], self.0[1], 0); let (t2, carry) = mac(0, self.0[0], self.0[2], carry); let (t3, carry) = mac(0, self.0[0], self.0[3], carry); @@ -541,7 +585,7 @@ impl Fp { let (t10, carry) = mac(t10, self.0[5], self.0[5], carry); let (t11, _) = adc(t11, 0, carry); - Self::montgomery_reduce(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) + FpWide([t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11]) } } diff --git a/src/fp/magnitudes.rs b/src/fp/magnitudes.rs new file mode 100644 index 00000000..185013fc --- /dev/null +++ b/src/fp/magnitudes.rs @@ -0,0 +1,235 @@ +#![allow(dead_code)] + +pub struct U1; +pub struct U2; +pub struct U3; +pub struct U4; +pub struct U5; +pub struct U6; +pub struct U7; +pub struct U8; +pub struct U9; + +pub trait Magnitude { + const P: [u64; 6]; + const P2: [u64; 12]; + const U64: u64; +} +impl Magnitude for U1 { + const P: [u64; 6] = super::MODULUS; + const P2: [u64; 12] = [ + 0x26aa00001c718e39, + 0x7ced6b1d76382eab, + 0x162c338362113cfd, + 0x66bf91ed3e71b743, + 0x292e85a87091a049, + 0x1d68619c86185c7b, + 0xf53149330978ef01, + 0x50a62cfd16ddca6e, + 0x66e59e49349e8bd0, + 0xe2dc90e50e7046b4, + 0x4bd278eaa22f25e9, + 0x2a437a4b8c35fc7, + ]; + const U64: u64 = 1; +} +impl Magnitude for U2 { + const P: [u64; 6] = [ + 0x73fdffffffff5556, + 0x3d57fffd62a7ffff, + 0xce61a541ed61ec48, + 0xc8ee9709e70a257e, + 0x96374f6c869759ae, + 0x340223d472ffcd34, + ]; + const P2: [u64; 12] = [ + 0x4d54000038e31c72, + 0xf9dad63aec705d56, + 0x2c586706c42279fa, + 0xcd7f23da7ce36e86, + 0x525d0b50e1234092, + 0x3ad0c3390c30b8f6, + 0xea62926612f1de02, + 0xa14c59fa2dbb94dd, + 0xcdcb3c92693d17a0, + 0xc5b921ca1ce08d68, + 0x97a4f1d5445e4bd3, + 0x5486f497186bf8e, + ]; + const U64: u64 = 2; +} +impl Magnitude for U3 { + const P: [u64; 6] = [ + 0x2dfcffffffff0001, + 0x5c03fffc13fbffff, + 0x359277e2e412e26c, + 0x2d65e28eda8f383e, + 0xe152f722c9e30686, + 0x4e0335beac7fb3ce, + ]; + const P2: [u64; 12] = [ + 0x73fe00005554aaab, + 0x76c8415862a88c01, + 0x42849a8a2633b6f8, + 0x343eb5c7bb5525c9, + 0x7b8b90f951b4e0dc, + 0x583924d592491571, + 0xdf93db991c6acd03, + 0xf1f286f744995f4c, + 0x34b0dadb9ddba370, + 0xa895b2af2b50d41d, + 0xe3776abfe68d71bd, + 0x7eca6ee2a4a1f55, + ]; + const U64: u64 = 3; +} +impl Magnitude for U4 { + const P: [u64; 6] = [ + 0xe7fbfffffffeaaac, + 0x7aaffffac54ffffe, + 0x9cc34a83dac3d890, + 0x91dd2e13ce144afd, + 0x2c6e9ed90d2eb35d, + 0x680447a8e5ff9a69, + ]; + const P2: [u64; 12] = [ + 0x9aa8000071c638e4, + 0xf3b5ac75d8e0baac, + 0x58b0ce0d8844f3f5, + 0x9afe47b4f9c6dd0c, + 0xa4ba16a1c2468125, + 0x75a18672186171ec, + 0xd4c524cc25e3bc04, + 0x4298b3f45b7729bb, + 0x9b967924d27a2f41, + 0x8b72439439c11ad1, + 0x2f49e3aa88bc97a7, + 0xa90de92e30d7f1d, + ]; + const U64: u64 = 4; +} +impl Magnitude for U5 { + const P: [u64; 6] = [ + 0xa1fafffffffe5557, + 0x995bfff976a3fffe, + 0x03f41d24d174ceb4, + 0xf6547998c1995dbd, + 0x778a468f507a6034, + 0x820559931f7f8103, + ]; + const P2: [u64; 12] = [ + 0xc15200008e37c71d, + 0x70a317934f18e957, + 0x6edd0190ea5630f3, + 0x1bdd9a23838944f, + 0xcde89c4a32d8216f, + 0x9309e80e9e79ce67, + 0xc9f66dff2f5cab05, + 0x933ee0f17254f42a, + 0x27c176e0718bb11, + 0x6e4ed47948316186, + 0x7b1c5c952aebbd91, + 0xd3516379bd0dee4, + ]; + const U64: u64 = 5; +} +impl Magnitude for U6 { + const P: [u64; 6] = [ + 0x5bf9fffffffe0002, + 0xb807fff827f7fffe, + 0x6b24efc5c825c4d8, + 0x5acbc51db51e707c, + 0xc2a5ee4593c60d0c, + 0x9c066b7d58ff679d, + ]; + const P2: [u64; 12] = [ + 0xe7fc0000aaa95556, + 0xed9082b0c5511802, + 0x850935144c676df0, + 0x687d6b8f76aa4b92, + 0xf71721f2a369c1b8, + 0xb07249ab24922ae2, + 0xbf27b73238d59a06, + 0xe3e50dee8932be99, + 0x6961b5b73bb746e1, + 0x512b655e56a1a83a, + 0xc6eed57fcd1ae37b, + 0xfd94ddc54943eab, + ]; + const U64: u64 = 6; +} +impl Magnitude for U7 { + const P: [u64; 6] = [ + 0x15f8fffffffdaaad, + 0xd6b3fff6d94bfffe, + 0xd255c266bed6bafc, + 0xbf4310a2a8a3833b, + 0x0dc195fbd711b9e3, + 0xb6077d67927f4e38, + ]; + const P2: [u64; 12] = [ + 0xea60000c71ae38f, + 0x6a7dedce3b8946ae, + 0x9b356897ae78aaee, + 0xcf3cfd7cb51c02d5, + 0x2045a79b13fb6201, + 0xcddaab47aaaa875e, + 0xb4590065424e8907, + 0x348b3aeba0108908, + 0xd04754007055d2b2, + 0x3407f6436511eeee, + 0x12c14e6a6f4a0965, + 0x127d85810d579e73, + ]; + const U64: u64 = 7; +} +impl Magnitude for U8 { + const P: [u64; 6] = [ + 0xcff7fffffffd5558, + 0xf55ffff58a9ffffd, + 0x39869507b587b120, + 0x23ba5c279c2895fb, + 0x58dd3db21a5d66bb, + 0xd0088f51cbff34d2, + ]; + const P2: [u64; 12] = [ + 0x35500000e38c71c8, + 0xe76b58ebb1c17559, + 0xb1619c1b1089e7eb, + 0x35fc8f69f38dba18, + 0x49742d43848d024b, + 0xeb430ce430c2e3d9, + 0xa98a49984bc77808, + 0x853167e8b6ee5377, + 0x372cf249a4f45e82, + 0x16e48728738235a3, + 0x5e93c75511792f4f, + 0x1521bd25c61afe3a, + ]; + const U64: u64 = 8; +} +impl Magnitude for U9 { + const P: [u64; 6] = [ + 0x89f6fffffffd0003, + 0x140bfff43bf3fffd, + 0xa0b767a8ac38a745, + 0x8831a7ac8fada8ba, + 0xa3f8e5685da91392, + 0xea09a13c057f1b6c, + ]; + const P2: [u64; 12] = [ + 0x5bfa0000fffe0001, + 0x6458c40927f9a404, + 0xc78dcf9e729b24e9, + 0x9cbc215731ff715b, + 0x72a2b2ebf51ea294, + 0x8ab6e80b6db4054, + 0x9ebb92cb5540670a, + 0xd5d794e5cdcc1de6, + 0x9e129092d992ea52, + 0xf9c1180d81f27c57, + 0xaa66403fb3a85538, + 0x17c5f4ca7ede5e01, + ]; + const U64: u64 = 9; +} diff --git a/src/fp2.rs b/src/fp2.rs index 4cd0a23a..ff3c9065 100644 --- a/src/fp2.rs +++ b/src/fp2.rs @@ -209,14 +209,18 @@ impl Fp2 { // c0 = v0 + v1 // c1 = (a0 + a1) * (b0 + b1) - v0 + v1 - let v0 = (&self.c0).mul(&rhs.c0); - let v1 = (&(&self.c1).neg()).mul(&rhs.c1); - let c0 = (&v0).add(&v1); - let c1 = (&(&self.c0).add(&self.c1)).mul(&(&rhs.c0).add(&rhs.c1)); - let c1 = (&c1).sub(&v0); - let c1 = (&c1).add(&v1); - - Fp2 { c0, c1 } + let v0 = (&self.c0).mul_unreduced(&rhs.c0); // v0 has magnitude 1 + let v1 = (&(&self.c1).neg()).mul_unreduced(&rhs.c1); // v1 has magnitude 1 + let c0 = v0.add(&v1); // c0 has magnitude 2 + let c1 = (&(&self.c0).add(&self.c1)).mul_unreduced(&(&rhs.c0).add(&rhs.c1)); // c1 has magnitude 1 + let c1 = c1.add(&v1); // c1 has magnitude 2 + let v0 = v0.negate(); // v0 has magnitude 2 (it could have been zero) + let c1 = c1.add(&v0); // c1 has magnitude 4 + + Fp2 { + c0: Fp::montgomery_reduce(c0), + c1: Fp::montgomery_reduce(c1), + } } pub const fn add(&self, rhs: &Fp2) -> Fp2 {